def __init__(self, filename, fileobj=None, data=None, pool_path=None): self.name = filename self._pool_path = pool_path # Used for manual pool path overrides if not data: if fileobj: self.ar = arpy.Archive(filename or getattr(fileobj, 'name', None), fileobj) else: self.ar = arpy.Archive(filename) self.ar.read_all_headers() self.control_tar = tarfile.open('control.tar.gz', 'r:gz', fileobj=self.ar.archived_files['control.tar.gz']) data = self.control_tar.extractfile('./control') super(AptPackage, self).__init__(data)
def extract(self, input_path, output_path): """ Extracts data from a DEB file. :param input_path: A String of the file path of the DEB to extract. :param output_path: A String of the file path to put the extracted DEB. Folder must already exist. :return: A Boolean on whether the extraction succeeded or failed. """ try: root_ar = arpy.Archive(input_path) root_ar.read_all_headers() try: data_bin = root_ar.archived_files[b'data.tar.gz'] data_tar = tarfile.open(fileobj=data_bin) data_tar.extractall(output_path) except Exception: data_theos_bin = root_ar.archived_files[b'data.tar.lzma'] data_theos_bin.seekable = lambda: True data_theos_tar = tarfile.open(fileobj=data_theos_bin, mode='r:xz') data_theos_tar.extractall( output_path ) # This is an actual Python/lzma implementation bug from the looks of it. control_bin = root_ar.archived_files[b'control.tar.gz'] control_tar = tarfile.open(fileobj=control_bin) control_tar.extractall(output_path) return True except Exception: return False
def test_empty_ar(self): ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'empty.ar')) ar.read_all_headers() self.assertEqual([], list(ar.archived_files.keys())) self.assertEqual(0, len(ar.headers)) ar.close()
def __init__(self, *args, **kwargs): if arpy is None: raise CLEError( "run `pip install arpy==1.1.1` to load archive files") super().__init__(*args, **kwargs) # hack: we are using a loader internal method in a non-kosher way which will cause our children to be # marked as the main binary if we are also the main binary # work around this by setting ourself here: if self.loader.main_object is None: self.loader.main_object = self ar = arpy.Archive(fileobj=self._binary_stream) ar.read_all_headers() for name, stream in ar.archived_files.items(): child = self.loader._load_object_isolated(stream) child.binary = child.binary_basename = name.decode() child.parent_object = self self.child_objects.append(child) if self.child_objects: self.arch = self.child_objects[0].arch else: l.warning("Loaded empty static archive?") self.has_memory = False self.pic = True # hack pt. 2 if self.loader.main_object is self: self.loader.main_object = None
def openar(filename, fileobj): """ """ ar = arpy.Archive(filename=filename, fileobj=fileobj) ar.read_all_headers() yield ar ar.close()
def test_windows(self): ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'windows.ar')) file_header = ar.read_next_header() self.assertIsNone(file_header.gid) self.assertIsNone(file_header.uid) ar.close()
def test_single_name(self): ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'normal.ar')) ar.read_all_headers() self.assertEqual([b'short'], list(ar.archived_files.keys())) self.assertEqual(1, len(ar.headers)) ar.close()
def test_iteration(self): ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'normal.ar')) ar_iterator = iter(ar) short = ar_iterator.next() self.assertEqual(b'short', short.header.name) self.assertRaises(StopIteration, ar_iterator.next) ar.close()
def test_fileobj(self): data = open(os.path.join(os.path.dirname(__file__), 'normal.ar'), "rb").read() ar = arpy.Archive(fileobj=io.BytesIO(data)) ar.read_all_headers() self.assertEqual([b'short'], list(ar.archived_files.keys())) self.assertEqual(1, len(ar.headers)) ar.close()
def test_single_name(self): ar = arpy.Archive( os.path.join(os.path.dirname(__file__), 'bsd_single_name.ar')) ar.read_all_headers() self.assertEqual([ b'a_very_long_name_for_the_gnu_type_header_so_it_can_overflow_the_standard_name_length' ], list(ar.archived_files.keys())) self.assertEqual(2, len(ar.headers)) ar.close()
def test_archive_contents(self): ar = arpy.Archive( os.path.join(os.path.dirname(__file__), 'contents.ar')) ar.read_all_headers() f1_contents = ar.archived_files[b'file1'].read() f2_contents = ar.archived_files[b'file2'].read() self.assertEqual(b'test_in_file_1\n', f1_contents) self.assertEqual(b'test_in_file_2\n', f2_contents) ar.close()
def test_symbols(self): ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'sym.ar')) syms = ar.read_next_header() self.assertEqual(arpy.HEADER_GNU_SYMBOLS, syms.type) self.assertEqual(4, syms.size) ao = ar.read_next_header() self.assertEqual(arpy.HEADER_NORMAL, ao.type) self.assertEqual(0, ao.size) self.assertEqual(b"a.o", ao.name) ar.close()
def test_mixed_names(self): ar = arpy.Archive( os.path.join(os.path.dirname(__file__), 'bsd_mixed.ar')) ar.read_all_headers() self.assertEqual([ b'a_very_long_name_for_the_gnu_type_header_so_it_can_overflow_the_standard_name_length', b'short' ], sorted(ar.archived_files.keys())) self.assertEqual(3, len(ar.headers)) ar.close()
def test_stream_skip_file(self): # make sure skipping contents is possible without seeking ar = arpy.Archive(fileobj=self.big_archive) f = ar.next() self.assertEqual(b'file1', f.header.name) f = ar.next() contents = f.read() self.assertEqual(b'file2', f.header.name) self.assertEqual(b'xx', contents) ar.close()
def test_stream_read(self): # make sure all contents can be read without seeking ar = arpy.Archive(fileobj=self.big_archive) f = ar.next() contents = f.read() self.assertEqual(b'file1', f.header.name) self.assertEqual(b' ' * 5000, contents) f = ar.next() contents = f.read() self.assertEqual(b'file2', f.header.name) self.assertEqual(b'xx', contents) ar.close()
def _get_tar(self): with open(self._get_mirror_file(), "rb") as deb_file: arpy_archive = arpy.Archive(fileobj=deb_file) arpy_archive.read_all_headers() data_tar_arpy = [ v for k, v in arpy_archive.archived_files.items() if b"data.tar" in k ][0] # ArchiveFileData is not enough like a file object for tarfile to use. # Monkey-patching a seekable method makes it close enough for TarFile to open. data_tar_arpy.seekable = lambda *args: True tar = tarfile.open(fileobj=data_tar_arpy, mode="r:*") yield tar
def control_extract(self, input_path, output_path): """ Extracts only the Control file(s) from a DEB :param input_path: A String of the file path of the DEB to extract. :param output_path: A String of the file path to put the extracted DEB. Folder must already exist. :return: A Boolean on whether the extraction succeeded or failed. """ try: root_ar = arpy.Archive(input_path) root_ar.read_all_headers() control_bin = root_ar.archived_files[b'control.tar.gz'] control_tar = tarfile.open(fileobj=control_bin) control_tar.extractall(output_path) return True except Exception: return False
def _get_control_tgz_from_ipk(ipk, ipk_path): """ Get the "control.tar.gz" file from within an ipk. Args: * ipk file-like: an open ipk file. * ipk_path path-like: the path to the ipk (for error messages). """ # ipk files are "ar" archives ar = arpy.Archive(fileobj=ipk) ar.read_all_headers() control_tgz_fname = b"control.tar.gz" if control_tgz_fname not in ar.archived_files: bb.fatal('Failed to find "{}" file in app file "{}"'.format( control_tgz_fname, ipk_path)) return ar.archived_files[control_tgz_fname]
def __init__(self, filename): # Attempt to open it archive = arpy.Archive(filename) archive.read_all_headers() self.name = os.path.basename(filename) # TODO: this is gross logger.debug("Loaded archive for filename") # TODO: Archive some metadata here! self.files = {} for fname, arfile in archive.archived_files.items(): self.files[fname] = None try: logger.debug("Processing %s:%s" % (filename, fname)) proj = angr.Project(arfile) self.files[fname] = BDSignature(proj, fname=fname) except Exception as e: logger.exception("Failed to create signature for %s:%s" % (filename, fname))
def extract(self, input_path, output_path): """ Extracts data from a DEB file. :param input_path: A String of the file path of the DEB to extract. :param output_path: A String of the file path to put the extracted DEB. Folder must already exist. :return: A Boolean on whether the extraction succeeded or failed. """ try: root_ar = arpy.Archive(input_path) root_ar.read_all_headers() try: data_bin = root_ar.archived_files[b'data.tar.gz'] data_tar = tarfile.open(fileobj=data_bin) data_tar.extractall(output_path) except Exception: try: data_theos_bin = root_ar.archived_files[b'data.tar.lzma'] data_theos_bin.seekable = lambda: True data_theos_tar = tarfile.open(fileobj=data_theos_bin, mode='r:xz') data_theos_tar.extractall(output_path) except Exception: try: data_theos_bin = root_ar.archived_files[b'data.tar.xz'] data_theos_bin.seekable = lambda: True data_theos_tar = tarfile.open(fileobj=data_theos_bin, mode='r:xz') data_theos_tar.extractall(output_path) except Exception: print( "\033[91m- DEB Extraction Error -\n" "The DEB file inserted for one of your packages is invalid. Please report this as a bug " "and attach the DEB file at \"" + output_path + "\".\033[0m") control_bin = root_ar.archived_files[b'control.tar.gz'] control_tar = tarfile.open(fileobj=control_bin) control_tar.extractall(output_path) return True except Exception: return False
def _readControl(self): ar = arpy.Archive(self.filename) ar.read_all_headers() if b'control.tar.xz' in ar.archived_files: tar = LZMAFile(filename=ar.archived_files[b'control.tar.xz']) # NOTE: this requires https://github.com/viraptor/arpy/pull/5 elif b'control.tar.gz' in ar.archived_files: tar = GzipFile(fileobj=ar.archived_files[b'control.tar.gz']) else: raise ValueError('Unable to find control file') raw = TarFile(fileobj=tar) control = raw.extractfile('./control').read() raw.close() tar.close() ar.close() return control
def main(argv): p = ArgumentParser(prog=basename(argv[0]), description="Hash the contents of archives") p.add_argument("--debug", action="store_true", help="Enable debugging output") p.add_argument("archives", metavar="ARCHIVES", nargs="*") args = p.parse_args(argv[1:]) root_logger = logging.getLogger() logging.basicConfig() if args.debug: root_logger.setLevel(logging.DEBUG) else: root_logger.setLevel(logging.INFO) hash = hashlib.sha256() for archive_filename in args.archives: with open(archive_filename, "rb") as archive_file: archive = arpy.Archive(fileobj=archive_file) log.debug("opened archive %r", archive_filename) for arfile in archive: hash.update(arfile.header.name) nbytes = 0 filehash = hashlib.sha256() while True: buf = arfile.read(32768) if not buf: break hash.update(buf) filehash.update(buf) nbytes += len(buf) log.debug("hashed %s/%s %r %s bytes", archive_filename, arfile.header.name.decode("utf-8"), filehash.hexdigest(), nbytes) # 128 bits of entropy is enough for anyone digest = hash.digest()[:16] log.debug("digest %r", digest) print(b64encode(digest, b"@_").decode("ascii").rstrip("="))
def __init__(self, filename=None, fileobj=None): self.file = fileobj or open(filename, "rb") self.ar = arpy.Archive(fileobj=self.file) self.ar.read_all_headers()
def test_bad_table_reference(self): bad_ar = b'!<arch>\n// 0 `\n' \ b'/9 1297730011 1000 1000 100644 0 `\n' ar = arpy.Archive(fileobj=io.BytesIO(bad_ar)) self.assertRaises(arpy.ArchiveFormatError, ar.read_all_headers)
def test_bad_table_size(self): bad_ar = b'!<arch>\n// 10 `\n' ar = arpy.Archive(fileobj=io.BytesIO(bad_ar)) self.assertRaises(arpy.ArchiveFormatError, ar.read_all_headers)
def test_bad_file_size(self): bad_ar = b'!<arch>\nfile1/ 1364071329 1000 100 100644 15 `\nabc' ar = arpy.Archive(fileobj=io.BytesIO(bad_ar)) ar.read_all_headers() f1 = ar.archived_files[b'file1'] self.assertRaises(arpy.ArchiveAccessError, f1.read)
def test_bad_file_header_nums(self): bad_ar = b'!<arch>\nfile1/ aaaa071329 1000 100 100644 15 `\n' ar = arpy.Archive(fileobj=io.BytesIO(bad_ar)) self.assertRaises(arpy.ArchiveFormatError, ar.read_all_headers)
def test_bad_file_header_short(self): bad_ar = b'!<arch>\nfile1/ 1364071329 1000' ar = arpy.Archive(fileobj=io.BytesIO(bad_ar)) self.assertRaises(arpy.ArchiveFormatError, ar.read_all_headers)
def setUp(self): self.ar = arpy.Archive( os.path.join(os.path.dirname(__file__), 'contents.ar')) self.ar.read_all_headers() self.f1 = self.ar.archived_files[b'file1']
def test_header_description(self): ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'normal.ar')) header = ar.read_next_header() self.assertTrue(repr(header).startswith('<ArchiveFileHeader')) ar.close()