def gzopen_without_timestamps(name, mode="r", fileobj=None, **kwargs): """ !! Method overrided by laso to pass mtime=0 (!=None) to avoid time.time() was setted in Gzip file causing md5 to change. Not possible using the previous tarfile open because arguments are not passed to GzipFile constructor """ compresslevel = int(os.getenv("CONAN_COMPRESSION_LEVEL", 9)) if mode not in ("r", "w"): raise ValueError("mode must be 'r' or 'w'") try: fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj, mtime=0) except OSError: if fileobj is not None and mode == 'r': raise tarfile.ReadError("not a gzip file") raise try: # Format is forced because in Python3.8, it changed and it generates different tarfiles # with different checksums, which break hashes of tgzs t = tarfile.TarFile.taropen(name, mode, fileobj, format=tarfile.GNU_FORMAT, **kwargs) except IOError: fileobj.close() if mode == 'r': raise tarfile.ReadError("not a gzip file") raise except Exception: fileobj.close() raise t._extfileobj = False return t
def check_tarfile(self): ''' Check if the tar_archive exists and is a valid tar file ''' try: if not tarfile.is_tarfile(self.tar_archive): # file exists but is not a valid tar file raise tarfile.ReadError(self.tar_archive + ' is not a tar archive') except FileNotFoundError: # file does not exist raise tarfile.ReadError(self.tar_archive + ' is not found')
def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): """Open lzma compressed tar archive name for reading or writing. Attention: Appending is not allowed. Note: Backported from `Python 3.6 <https://github.com/python/cpython/blob/3.6/Lib/tarfile.py>`_ """ if mode not in ("r", "w", "x"): raise ValueError("mode must be 'r', 'w' or 'x'") fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset) try: t = cls.taropen(name, mode, fileobj, **kwargs) except (lzma.LZMAError, EOFError): fileobj.close() if mode == 'r': raise tarfile.ReadError("not an lzma file") raise except: fileobj.close() raise t._extfileobj = False return t
def extract(self, path: pathlib.Path, data_dir: pathlib.Path, file_list_file: pathlib.Path) -> None: """Attempt to extract the tar archive. Save metadata about the list of files in the downloaded dataset in ``file_list_file``. :param path: Path to the tar archive. :param data_dir: Path to the data dir to extract data files to. :file_list_file: Path to the file that stores the list of files in the downloaded dataset. :raises tarfile.ReadError: The tar archive was unable to be read. """ try: mytar = tarfile.open(path) except tarfile.ReadError as e: raise tarfile.ReadError(f'Failed to unarchive tar file "{path}"\ncaused by:\n{e}') with mytar: FileListFileContents = Dict[str, Dict[str, int]] contents: FileListFileContents = {} metadata: Dict[str, Union[str, FileListFileContents]] = {} metadata['type'] = 'application/x-tar' for member in mytar.getmembers(): contents[member.name] = {'type': int(member.type)} if member.isreg(): # For regular files, we also save its size contents[member.name]['size'] = member.size metadata['contents'] = contents with open(file_list_file, mode='w') as f: # We do not specify 'utf-8' here to match the default encoding used by the OS, which also likely # uses this encoding for accessing the filesystem. json.dump(metadata, f, indent=2) mytar.extractall(path=data_dir)
def lzmaopen(cls, name=None, mode="r", fileobj=None, compressformat=lzma.FORMAT_XZ, compresslevel=9, **kwargs): """Open lzma/xz compressed tar archive name for reading or writing. Appending is not allowed. """ if len(mode) > 1 or mode not in "rw": raise ValueError("mode must be 'r' or 'w'.") try: import lzma except ImportError: raise tarfile.CompressionError("lzma module is not available") if fileobj is not None: fileobj = _LZMAProxy(fileobj, mode) else: options = {"format": compressformat, "level": compresslevel} fileobj = lzma.LZMAFile(name, mode, format=1) try: t = cls.taropen(name, mode, fileobj, **kwargs) except IOError: raise tarfile.ReadError("not a lzma file") t._extfileobj = False return t
def decompress(self): try: # Set pointer to current directory os.chdir(self.__path) # Unzip the file (GeoLite2-City.tar.gz) to get the database with tarfile.open(self.__file, 'r') as tar_file: tar_file.extractall(self.__path) # Get root file to get database (GeoLite2-City_aaaammdd) folder = tar_file.getnames()[0] # Changing to the root folder (GeoLite2-City_aaaammdd) os.chdir(os.path.join(self.__path, folder)) # copying the database to the assets directory shutil.copy(self.__db, self.__path) os.chdir(self.__path) # Removing the folder (GeoLite2-City_aaaammdd) shutil.rmtree(folder) tar_file.close() os.remove(self.__file) except FileNotFoundError: raise FileNotFoundError( "El sistema no puede encontrar el archivo especificado") except tarfile.ReadError: raise tarfile.ReadError( "El archivo no se pudo abrir correctamente")
def __init__(self, name=None, mode='r', mtime=None): if not mode.startswith('r') or tarfile.is_tarfile(name): self.__tar = tarfile.TarFile(name=name, mode=mode) else: # convert for tar if not zipfile.is_zipfile(name): raise tarfile.ReadError() try: tmp_dir = tempfile.mkdtemp() tmp_fd, tmp_name = tempfile.mkstemp() tmp_fo = os.fdopen(tmp_fd, 'w') zip = zipfile.ZipFile(name) zip.extractall(tmp_dir) tar = tarfile.TarFile(fileobj=tmp_fo, mode='w') tar.add(tmp_dir, arcname='') tar.close() self.__tar = tarfile.TarFile(name=tmp_name, mode=mode) finally: tmp_fo.close() os.unlink(tmp_name) shutil.rmtree(tmp_dir) if mtime: self.mtime = mtime else: self.mtime = time.time()
def zstdopen(cls, name, mode="r", fileobj=None, cctx=None, dctx=None, **kwargs): # type: ignore """Open zstd compressed tar archive name for reading or writing. Appending is not allowed. """ if mode not in ("r"): raise ValueError("mode must be 'r'") try: zobj = zstandard.open(fileobj or name, mode + "b", cctx=cctx, dctx=dctx) with zobj: data = zobj.read() except (zstandard.ZstdError, EOFError) as e: raise tarfile.ReadError("not a zstd file") from e fileobj = io.BytesIO(data) t = cls.taropen(name, mode, fileobj, **kwargs) t._extfileobj = False return t
def zstdopen(cls, name, mode="r", fileobj=None, level=9, **kwargs): """ Open zstd compressed tar archive name for reading or writing. Appending is not allowed. """ if mode not in ("r", "w", "x"): raise ValueError("mode must be 'r', 'w' or 'x'") fileobj = ZstdFile( fileobj or name, mode, level=level ) try: t = cls.taropen(name, mode, fileobj, **kwargs) except (OSError, EOFError): fileobj.close() if mode == 'r': raise tarfile.ReadError("not a zstd file") raise except: fileobj.close() raise t._extfileobj = False return t
def download(self, check: bool = True) -> None: """Downloads, extracts, and removes dataset archive. It adds a directory write lock during execution. :param check: Check to make sure the data files are not already present in :attr:`._data_dir` (passed in via ``data_dir`` in the constructor :class:`Dataset`) by running :meth:`.is_downloaded`. If set to ``True``, raise an error if they are present and prevent a subsequent download. Set to ``False`` to remove this safeguard, and subsequent calls to :meth:`.download` will then overwrite data files if they were previously downloaded to :attr:`._data_dir`. :raises RuntimeError: The dataset was previously downloaded as indicated by :meth:`.is_downloaded` returning ``True``. :raises NotADirectoryError: :attr:`Dataset._data_dir` (passed in via ``data_dir`` in the constructor :class:`Dataset`) points to an existing file that is not a directory. :raises OSError: The SHA512 checksum of a downloaded dataset doesn't match the expected checksum. :raises tarfile.ReadError: The tar archive was unable to be read. :raises exceptions.DirectoryLockAcquisitionError: Failed to acquire the directory lock. """ if check and self.is_downloaded(): raise RuntimeError( f'{self.__class__.__name__}.download() was previously called. To overwrite existing ' f'data files, rerun {self.__class__.__name__}.download() with ``check`` set to ' f'``False``.') download_url = self._schema['download_url'] download_file_name = pathlib.Path(os.path.basename(download_url)) with self._lock.locking_with_exception(write=True): archive_fp = self._pydax_dir / download_file_name response = requests.get(download_url, stream=True) archive_fp.write_bytes(response.content) computed_hash = hashlib.sha512(archive_fp.read_bytes()).hexdigest() actual_hash = self._schema['sha512sum'] if not actual_hash == computed_hash: raise OSError( f'{archive_fp} has a SHA512 checksum of: ({computed_hash}) ' f'which is different from the expected SHA512 checksum of: ({actual_hash}) ' f'the file may by corrupted.') # Supports tar archives only for now try: tar = tarfile.open(archive_fp) except tarfile.ReadError as e: raise tarfile.ReadError( f'Failed to unarchive "{archive_fp}"\ncaused by:\n{e}') with tar: members = {} for member in tar.getmembers(): members[member.name] = {'type': int(member.type)} if member.isreg( ): # For regular files, we also save its size members[member.name]['size'] = member.size with open(self._file_list_file, mode='w') as f: # We do not specify 'utf-8' here to match the default encoding used by the OS, which also likely # uses this encoding for accessing the filesystem. json.dump(members, f, indent=2) tar.extractall(path=self._data_dir) os.remove(archive_fp)
def test_tape_empty_when_tarfile_ReadError_with_msg_then_drive_is_empty(self, mock_rewind_tape, mock_tarfile_open): exception = tarfile.ReadError('empty file') mock_tarfile_open.side_effect = exception self.assertTrue(tape_empty("some_drive")) mock_tarfile_open.assert_called_once() mock_rewind_tape.assert_called_once_with("some_drive")
def test_tape_empty_when_tarfile_ReadError_and_unknown_message_then_raise_exception(self, mock_tarfile_open): exception = tarfile.ReadError() exception.message = 'some other message' mock_tarfile_open.side_effect = exception with self.assertRaises(tarfile.ReadError): tape_empty("some_drive") mock_tarfile_open.assert_called_once()
def open(cls, name=None, mode="r", fileobj=None, **kwargs): fileobj = lzma.LZMAFile(name, mode, fileobj=fileobj) try: t = cls.taropen(name, mode, fileobj, **kwargs) except lzma.LZMAError: fileobj.close() raise tarfile.ReadError("not an lzma file") t._extfileobj = False return t
def test_upload_archive_failed(self): faulty_tar = MagicMock() faulty_tar.extract.side_effect = tarfile.ReadError() member = MagicMock() faulty_tar.__iter__.return_value = [member] tmp_dump_dir = tempfile.mkdtemp() self.assertRaises(DumpInvalidException, self.uploader.upload_archive, tmp_dump_dir, faulty_tar, '/test', schema.artist_relation_schema, self.uploader.process_json) status = utils.path_exists('/test') self.assertFalse(status)
def _load(self, tarfilepath): if not tarfile.is_tarfile(tarfilepath): raise tarfile.ReadError("%r is not a readable tar archive file" % tarfilepath) root_dir = self._get_root_dir(tarfilepath) with tarfile.open(tarfilepath, mode='r') as tar: metadata_reader = ArtifactDataReader(tar, root_dir) type_, uuid_, provenance = self._load_metadata(metadata_reader) data_reader = ArtifactDataReader( tar, os.path.join(root_dir, self._data_dir)) data = type_().load(data_reader) return data, type_, provenance, uuid_
def xzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): """Open lzma compressed tar archive name for reading or writing. Appending is not allowed. """ if mode not in ("r", "w"): raise ValueError("mode must be 'r' or 'w'") try: from backports import lzma lzma.LZMAFile except (ImportError, AttributeError): raise tarfile.CompressionError("lzma module is not available") try: fileobj = lzma.LZMAFile(fileobj or name, mode) except (OSError, IOError): if mode == 'r': raise tarfile.ReadError("not an lzma file") raise try: fileobj.peek() except (lzma.LZMAError, EOFError): raise tarfile.ReadError("not an lzma file") try: t = cls.taropen(name, mode, fileobj, **kwargs) except IOError: fileobj.close() if mode == 'r': raise tarfile.ReadError("not an lzma file") raise except: fileobj.close() raise t._extfileobj = False return t
def extract_all(self): """Extract All""" self.logger.debug("Reading header from %s/%s ...", self.path, self.tarfilename) members_to_extract = [] extract = False attempts = 0 # Our little retry loop. Implemented due to speed-related writing errors. # TODO Replace / update with "tenacity" module. while attempts < 3: try: tfile = tarfile.open(os.path.join(self.path, self.tarfilename), 'r') break except tarfile.ReadError as error: self.logger.warning( 'ReadError encountered when opening tar file.') self.logger.warning('Sleeping for 2 seconds and trying again.') self.logger.warning(error) attempts += 1 time.sleep(2) if attempts == 3: raise tarfile.ReadError('Tar file could not be read after 3 attempts: %s + "/" + %s' \ % (self.path, self.tarfilename)) for member in tfile.getmembers(): lower_name = member.name.lower() if 'gff' in lower_name: self.logger.info('Skipping GFF file extraction for %s', member.name) continue if os.path.exists(os.path.join(self.path, member.name)): self.logger.info('%s/%s already exists, not extracting.', self.path, member.name) else: self.logger.info("Extracting (%s->%s/%s)", member.name, self.path, member.name) members_to_extract.append(member) extract = True if extract is True: tfile.extractall(self.path, members_to_extract)
def download(self) -> None: """Downloads, extracts, and removes dataset archive. It adds a directory write lock during execution. :raises NotADirectory: :attr:`Dataset._data_dir` (passed in via :meth:`.__init__()`) points to an existing file that is not a directory. :raises OSError: The SHA512 checksum of a downloaded dataset doesn't match the expected checksum. :raises tarfile.ReadError: The tar archive was unable to be read. :raises exceptions.DirectoryLockAcquisitionError: Failed to acquire the directory lock. """ download_url = self._schema['download_url'] download_file_name = pathlib.Path(os.path.basename(download_url)) with self._lock.locking_with_exception(write=True): archive_fp = self._pydax_dir / download_file_name response = requests.get(download_url, stream=True) archive_fp.write_bytes(response.content) computed_hash = hashlib.sha512(archive_fp.read_bytes()).hexdigest() actual_hash = self._schema['sha512sum'] if not actual_hash == computed_hash: raise OSError( f'{archive_fp} has a SHA512 checksum of: ({computed_hash}) \ which is different from the expected SHA512 checksum of: ({actual_hash}) \ the file may by corrupted.') # Supports tar archives only for now try: tar = tarfile.open(archive_fp) except tarfile.ReadError as e: raise tarfile.ReadError( f'Failed to unarchive "{archive_fp}"\ncaused by:\n{e}') with tar: members = {} for member in tar.getmembers(): members[member.name] = {'type': int(member.type)} if member.isreg( ): # For regular files, we also save its size members[member.name]['size'] = member.size with open(self._file_list_file, mode='w') as f: # We do not specify 'utf-8' here to match the default encoding used by the OS, which also likely # uses this encoding for accessing the filesystem. json.dump(members, f, indent=2) tar.extractall(path=self._data_dir) os.remove(archive_fp)
def lzmaopen(cls, name=None, mode="r", fileobj=None, compresslevel=None, **kwargs): """Open lzma/xz compressed tar archive name for reading or writing. Appending is not allowed. """ try: import lzma except ImportError: try: from backports import lzma except ImportError: raise tarfile.CompressionError("Lzma module is not available") if not compresslevel: compresslevel = ctx.config.values.build.compressionlevel if len(mode) > 1 or mode not in "rw": raise ValueError("mode must be 'r' or 'w'.") if 'w' in mode: if fileobj is not None: fileobj = _LZMAProxy(fileobj, mode) else: fileobj = lzma.LZMAFile(name, mode, preset=compresslevel) else: if fileobj is not None: fileobj = _LZMAProxy(fileobj, mode) else: fileobj = lzma.LZMAFile(name, mode) try: t = cls.taropen(name, mode, fileobj, **kwargs) except IOError: raise tarfile.ReadError( _(" \"{}\" is not a lzma file.").format(name)) t._extfileobj = False return t
def _open(self): """ Function to open the tarfile for writing Raises: ReadError: Is raised when a tar archive is opened, that either cannot be handled by the tarfile module or is somehow invalid. ValueError: Is raised when the given compression type is not supported """ if re.match('^\.?tgz$', self.ext, re.IGNORECASE): try: self.arcfile = tarfile.open(self.arcname + '.tar.gz', 'w:gz') except Exception as e: logger.exception('[' + self.arcname + \ '] Unable to create output file for writing ' + str(e)) raise tarfile.ReadError( 'Unable to create output file for writing') else: logger.error('[' + self.arcname + '] Invalid compression type ' + self.ext) raise ValueError('Invalid compression type ' + self.ext)
def test_download_results_corrupted_compression_readerror( self, get_mock, tarfile_mock): requests_result_mock = Mock() status_code_mock = PropertyMock(return_value=200) type(requests_result_mock).status_code = status_code_mock content_mock = PropertyMock(return_value=self.tarfile.read()) type(requests_result_mock).content = content_mock url_mock = PropertyMock(return_value="http://foo.bar.com/file.tar.xz") type(requests_result_mock).url = url_mock headers_mock = PropertyMock( return_value={"Content-Type": "application/x-tar"}) type(requests_result_mock).headers = headers_mock get_mock.return_value = requests_result_mock tarfile_mock.side_effect = tarfile.ReadError() results = self.plugin._download_results(RESULT_DICT) status_code_mock.assert_called_once_with() self.assertEqual(content_mock.call_count, 2) self.assertEqual(self.plugin.tradefed_results_url, RESULT_URL) self.assertIsNone(results.test_results) self.assertIsNone(results.tradefed_stdout) self.assertIsNone(results.tradefed_logcat)
def callback(self, ch, method, properties, body): file_path = body.decode('utf-8') rsync_statement = "rsync " + file_path + " ../src-logs/" print(rsync_statement) try: code = subprocess.call(rsync_statement, shell=True) if code == 0: file_name = file_path.split('/')[-1] new_file_path = "../src-logs/" + file_name try: if tarfile.is_tarfile(new_file_path): tar_file = tarfile.open(new_file_path, "r:gz") for tarinfo in tar_file: print(tarinfo.name, " is ", tarinfo.size, " bytes") tar_file.extract(tarinfo, path='../src-logs/') self.split_file("../src-logs/" + tarinfo.name) ch.basic_ack(delivery_tag=method.delivery_tag) os.remove(new_file_path) except tarfile.TarError: print("tarfile error") raise (tarfile.TarError('tarfile tar error')) except tarfile.ReadError: print("read tarfile error") raise (tarfile.ReadError("read tarfile error")) else: print("cann't async to file_path") ch.basic_reject(delivery_tag=method.delivery_tag) #ch.basic_ack(delivery_tag=method.delivery_tag) return except (OSError, pika.exceptions.ConnectionClosed, zlib.error) as err: #ch.basic_reject(delivery_tag=method.delivery_tag) os.remove(new_file_path) f = open("../src-logs/" + tarinfo.name, 'w') f.close() print(err) self.split_file("../src-logs/" + tarinfo.name) ch.basic_ack(delivery_tag=method.delivery_tag) return
def __next__(self): """A copy and modification of the next() method in tarfile module. The copy is from tarfile.py of CPython @102457:95df96aa2f5a # Copyright (C) 2002 Lars Gustäbel <*****@*****.**> # All rights reserved. # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, # copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following # conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. """ self._check("ra") if self.firstmember is not None: m = self.firstmember self.firstmember = None return m # Advance the file pointer. if self.offset != self.fileobj.tell(): self.fileobj.seek(self.offset - 1) if not self.fileobj.read(1): raise tarfile.ReadError("unexpected end of data") # Read the next block. tarinfo = None while True: try: tarinfo = self.tarinfo.fromtarfile(self) except tarfile.EOFHeaderError as e: if self.ignore_zeros: self._dbg(2, "0x%X: %s" % (self.offset, e)) self.offset += tarfile.BLOCKSIZE continue except tarfile.InvalidHeaderError as e: if self.ignore_zeros: self._dbg(2, "0x%X: %s" % (self.offset, e)) self.offset += tarfile.BLOCKSIZE continue # Modify here, to raise exceptions if errorlevel is bigger than 0. elif self.errorlevel > 0: raise tarfile.ReadError(str(e)) except tarfile.EmptyHeaderError: if self.offset == 0: raise tarfile.ReadError("empty file") except tarfile.TruncatedHeaderError as e: if self.offset == 0: raise tarfile.ReadError(str(e)) except tarfile.SubsequentHeaderError as e: raise tarfile.ReadError(str(e)) break if tarinfo is not None: self.members.append(tarinfo) else: self._loaded = True return tarinfo
def tariterator(fileobj, check_sorted=False, keys=base_plus_ext, decode=True, source=None, lcase=True, filename=None): """Iterate over samples from a tar archive, either locally or given by URL. Tar archives are assumed to be sorted by file name. For each basename, reads all the files with different extensions and returns a dictionary with the extension as key and the file contents as value. :param str archive: tar archive with sorted file names (file name or URL) :param bool check_sorted: verify that file names are sorted :returns: iterator over samples """ if filename is None: filename = getattr(fileobj, "pipe_cmd", getattr(fileobj, "name", "?")) if decode is True: decode = utils.autodecode elif decode is False: decode = trivial_decode elif isinstance(decode, str): decode = utils.autodecoder(decode) current_count = 0 current_prefix = None current_sample = None try: stream = tarfile.open(fileobj=fileobj, mode="r|*") except tarfile.ReadError: info = getattr(fileobj, "pipe_cmd", getattr(fileobj, "name", "?")) raise tarfile.ReadError("{}: empty file".format(info)) for tarinfo in stream: if not tarinfo.isreg(): continue fname = tarinfo.name if fname is None: warnings.warn("tarinfo.name is None") continue prefix, suffix = keys(fname) if prefix is None: warnings.warn("prefix is None for: %s" % (tarinfo.name, )) continue if prefix != current_prefix: if check_sorted and prefix <= current_prefix: raise ValueError("[%s] -> [%s]: tar file does not contain sorted keys (%s)" % \ (current_prefix, prefix, filename)) if valid_sample(current_sample): decoded = maybe_decode(current_sample, decode, current_count, info="file {}".format(filename)) if decoded is not None: yield decoded current_prefix = prefix current_sample = dict(__key__=prefix, __source__=source) try: data = stream.extractfile(tarinfo).read() except tarfile.ReadError as e: print("tarfile.ReadError at", current_count) print("file:", tarinfo.name) print("source:", filename) print(e) current_sample["__bad__"] = True else: if lcase: suffix = suffix.lower() current_sample[suffix] = data current_count += 1 if valid_sample(current_sample): decoded = maybe_decode(current_sample, decode, current_count) if decoded is not None: yield decoded try: del stream except: pass
def test_check_resource_invalid_archive(self, tarfile_open): self.resource_get.return_value = 'targzfile' tarfile_open.side_effect = tarfile.ReadError() self.assertFalse(kerberos_keytab_utils.check_resource())