Ejemplo n.º 1
0
 def __init__(self, filename, fileobj=None, data=None, pool_path=None):
     self.name = filename
     self._pool_path = pool_path # Used for manual pool path overrides
     if not data:
         if fileobj:
             self.ar = arpy.Archive(filename or getattr(fileobj, 'name', None), fileobj)
         else:
             self.ar = arpy.Archive(filename)
         self.ar.read_all_headers()
         self.control_tar = tarfile.open('control.tar.gz', 'r:gz', fileobj=self.ar.archived_files['control.tar.gz'])
         data = self.control_tar.extractfile('./control')
     super(AptPackage, self).__init__(data)
Ejemplo n.º 2
0
    def extract(self, input_path, output_path):
        """
        Extracts data from a DEB file.
        :param input_path: A String of the file path of the DEB to extract.
        :param output_path: A String of the file path to put the extracted DEB. Folder must already exist.
        :return: A Boolean on whether the extraction succeeded or failed.
        """
        try:
            root_ar = arpy.Archive(input_path)
            root_ar.read_all_headers()
            try:
                data_bin = root_ar.archived_files[b'data.tar.gz']
                data_tar = tarfile.open(fileobj=data_bin)
                data_tar.extractall(output_path)
            except Exception:
                data_theos_bin = root_ar.archived_files[b'data.tar.lzma']
                data_theos_bin.seekable = lambda: True
                data_theos_tar = tarfile.open(fileobj=data_theos_bin,
                                              mode='r:xz')
                data_theos_tar.extractall(
                    output_path
                )  # This is an actual Python/lzma implementation bug from the looks of it.

            control_bin = root_ar.archived_files[b'control.tar.gz']
            control_tar = tarfile.open(fileobj=control_bin)
            control_tar.extractall(output_path)
            return True
        except Exception:
            return False
Ejemplo n.º 3
0
	def test_empty_ar(self):
		ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'empty.ar'))
		ar.read_all_headers()
		self.assertEqual([],
				list(ar.archived_files.keys()))
		self.assertEqual(0, len(ar.headers))
		ar.close()
Ejemplo n.º 4
0
    def __init__(self, *args, **kwargs):
        if arpy is None:
            raise CLEError(
                "run `pip install arpy==1.1.1` to load archive files")
        super().__init__(*args, **kwargs)

        # hack: we are using a loader internal method in a non-kosher way which will cause our children to be
        # marked as the main binary if we are also the main binary
        # work around this by setting ourself here:
        if self.loader.main_object is None:
            self.loader.main_object = self

        ar = arpy.Archive(fileobj=self._binary_stream)
        ar.read_all_headers()
        for name, stream in ar.archived_files.items():
            child = self.loader._load_object_isolated(stream)
            child.binary = child.binary_basename = name.decode()
            child.parent_object = self
            self.child_objects.append(child)

        if self.child_objects:
            self.arch = self.child_objects[0].arch
        else:
            l.warning("Loaded empty static archive?")
        self.has_memory = False
        self.pic = True

        # hack pt. 2
        if self.loader.main_object is self:
            self.loader.main_object = None
Ejemplo n.º 5
0
def openar(filename, fileobj):
    """
    """
    ar = arpy.Archive(filename=filename, fileobj=fileobj)
    ar.read_all_headers()
    yield ar
    ar.close()
Ejemplo n.º 6
0
 def test_windows(self):
     ar = arpy.Archive(os.path.join(os.path.dirname(__file__),
                                    'windows.ar'))
     file_header = ar.read_next_header()
     self.assertIsNone(file_header.gid)
     self.assertIsNone(file_header.uid)
     ar.close()
Ejemplo n.º 7
0
	def test_single_name(self):
		ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'normal.ar'))
		ar.read_all_headers()
		self.assertEqual([b'short'],
				list(ar.archived_files.keys()))
		self.assertEqual(1, len(ar.headers))
		ar.close()
Ejemplo n.º 8
0
	def test_iteration(self):
		ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'normal.ar'))
		ar_iterator = iter(ar)
		short = ar_iterator.next()
		self.assertEqual(b'short', short.header.name)
		self.assertRaises(StopIteration, ar_iterator.next)
		ar.close()
Ejemplo n.º 9
0
	def test_fileobj(self):
		data = open(os.path.join(os.path.dirname(__file__), 'normal.ar'), "rb").read()
		ar = arpy.Archive(fileobj=io.BytesIO(data))
		ar.read_all_headers()
		self.assertEqual([b'short'],
				list(ar.archived_files.keys()))
		self.assertEqual(1, len(ar.headers))
		ar.close()
Ejemplo n.º 10
0
 def test_single_name(self):
     ar = arpy.Archive(
         os.path.join(os.path.dirname(__file__), 'bsd_single_name.ar'))
     ar.read_all_headers()
     self.assertEqual([
         b'a_very_long_name_for_the_gnu_type_header_so_it_can_overflow_the_standard_name_length'
     ], list(ar.archived_files.keys()))
     self.assertEqual(2, len(ar.headers))
     ar.close()
Ejemplo n.º 11
0
 def test_archive_contents(self):
     ar = arpy.Archive(
         os.path.join(os.path.dirname(__file__), 'contents.ar'))
     ar.read_all_headers()
     f1_contents = ar.archived_files[b'file1'].read()
     f2_contents = ar.archived_files[b'file2'].read()
     self.assertEqual(b'test_in_file_1\n', f1_contents)
     self.assertEqual(b'test_in_file_2\n', f2_contents)
     ar.close()
Ejemplo n.º 12
0
	def test_symbols(self):
		ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'sym.ar'))
		syms = ar.read_next_header()
		self.assertEqual(arpy.HEADER_GNU_SYMBOLS, syms.type)
		self.assertEqual(4, syms.size)
		ao = ar.read_next_header()
		self.assertEqual(arpy.HEADER_NORMAL, ao.type)
		self.assertEqual(0, ao.size)
		self.assertEqual(b"a.o", ao.name)
		ar.close()
Ejemplo n.º 13
0
 def test_mixed_names(self):
     ar = arpy.Archive(
         os.path.join(os.path.dirname(__file__), 'bsd_mixed.ar'))
     ar.read_all_headers()
     self.assertEqual([
         b'a_very_long_name_for_the_gnu_type_header_so_it_can_overflow_the_standard_name_length',
         b'short'
     ], sorted(ar.archived_files.keys()))
     self.assertEqual(3, len(ar.headers))
     ar.close()
Ejemplo n.º 14
0
 def test_stream_skip_file(self):
     # make sure skipping contents is possible without seeking
     ar = arpy.Archive(fileobj=self.big_archive)
     f = ar.next()
     self.assertEqual(b'file1', f.header.name)
     f = ar.next()
     contents = f.read()
     self.assertEqual(b'file2', f.header.name)
     self.assertEqual(b'xx', contents)
     ar.close()
Ejemplo n.º 15
0
 def test_stream_read(self):
     # make sure all contents can be read without seeking
     ar = arpy.Archive(fileobj=self.big_archive)
     f = ar.next()
     contents = f.read()
     self.assertEqual(b'file1', f.header.name)
     self.assertEqual(b' ' * 5000, contents)
     f = ar.next()
     contents = f.read()
     self.assertEqual(b'file2', f.header.name)
     self.assertEqual(b'xx', contents)
     ar.close()
Ejemplo n.º 16
0
 def _get_tar(self):
     with open(self._get_mirror_file(), "rb") as deb_file:
         arpy_archive = arpy.Archive(fileobj=deb_file)
         arpy_archive.read_all_headers()
         data_tar_arpy = [
             v for k, v in arpy_archive.archived_files.items()
             if b"data.tar" in k
         ][0]
         # ArchiveFileData is not enough like a file object for tarfile to use.
         # Monkey-patching a seekable method makes it close enough for TarFile to open.
         data_tar_arpy.seekable = lambda *args: True
         tar = tarfile.open(fileobj=data_tar_arpy, mode="r:*")
         yield tar
Ejemplo n.º 17
0
 def control_extract(self, input_path, output_path):
     """
     Extracts only the Control file(s) from a DEB
     :param input_path: A String of the file path of the DEB to extract.
     :param output_path: A String of the file path to put the extracted DEB. Folder must already exist.
     :return: A Boolean on whether the extraction succeeded or failed.
     """
     try:
         root_ar = arpy.Archive(input_path)
         root_ar.read_all_headers()
         control_bin = root_ar.archived_files[b'control.tar.gz']
         control_tar = tarfile.open(fileobj=control_bin)
         control_tar.extractall(output_path)
         return True
     except Exception:
         return False
Ejemplo n.º 18
0
def _get_control_tgz_from_ipk(ipk, ipk_path):
    """
    Get the "control.tar.gz" file from within an ipk.

    Args:
    * ipk file-like: an open ipk file.
    * ipk_path path-like: the path to the ipk (for error messages).

    """
    # ipk files are "ar" archives
    ar = arpy.Archive(fileobj=ipk)
    ar.read_all_headers()
    control_tgz_fname = b"control.tar.gz"
    if control_tgz_fname not in ar.archived_files:
        bb.fatal('Failed  to find "{}" file in app file "{}"'.format(
            control_tgz_fname, ipk_path))
    return ar.archived_files[control_tgz_fname]
Ejemplo n.º 19
0
 def __init__(self, filename):
     # Attempt to open it
     archive = arpy.Archive(filename)
     archive.read_all_headers()
     self.name = os.path.basename(filename)  # TODO: this is gross
     logger.debug("Loaded archive for filename")
     # TODO: Archive some metadata here!
     self.files = {}
     for fname, arfile in archive.archived_files.items():
         self.files[fname] = None
         try:
             logger.debug("Processing %s:%s" % (filename, fname))
             proj = angr.Project(arfile)
             self.files[fname] = BDSignature(proj, fname=fname)
         except Exception as e:
             logger.exception("Failed to create signature for %s:%s" %
                              (filename, fname))
Ejemplo n.º 20
0
    def extract(self, input_path, output_path):
        """
        Extracts data from a DEB file.
        :param input_path: A String of the file path of the DEB to extract.
        :param output_path: A String of the file path to put the extracted DEB. Folder must already exist.
        :return: A Boolean on whether the extraction succeeded or failed.
        """
        try:
            root_ar = arpy.Archive(input_path)
            root_ar.read_all_headers()
            try:
                data_bin = root_ar.archived_files[b'data.tar.gz']
                data_tar = tarfile.open(fileobj=data_bin)
                data_tar.extractall(output_path)
            except Exception:
                try:
                    data_theos_bin = root_ar.archived_files[b'data.tar.lzma']
                    data_theos_bin.seekable = lambda: True
                    data_theos_tar = tarfile.open(fileobj=data_theos_bin,
                                                  mode='r:xz')
                    data_theos_tar.extractall(output_path)
                except Exception:
                    try:
                        data_theos_bin = root_ar.archived_files[b'data.tar.xz']
                        data_theos_bin.seekable = lambda: True
                        data_theos_tar = tarfile.open(fileobj=data_theos_bin,
                                                      mode='r:xz')
                        data_theos_tar.extractall(output_path)
                    except Exception:
                        print(
                            "\033[91m- DEB Extraction Error -\n"
                            "The DEB file inserted for one of your packages is invalid. Please report this as a bug "
                            "and attach the DEB file at \"" + output_path +
                            "\".\033[0m")

            control_bin = root_ar.archived_files[b'control.tar.gz']
            control_tar = tarfile.open(fileobj=control_bin)
            control_tar.extractall(output_path)
            return True
        except Exception:
            return False
Ejemplo n.º 21
0
    def _readControl(self):
        ar = arpy.Archive(self.filename)
        ar.read_all_headers()

        if b'control.tar.xz' in ar.archived_files:
            tar = LZMAFile(filename=ar.archived_files[b'control.tar.xz'])
            # NOTE: this requires https://github.com/viraptor/arpy/pull/5

        elif b'control.tar.gz' in ar.archived_files:
            tar = GzipFile(fileobj=ar.archived_files[b'control.tar.gz'])

        else:
            raise ValueError('Unable to find control file')

        raw = TarFile(fileobj=tar)

        control = raw.extractfile('./control').read()
        raw.close()
        tar.close()
        ar.close()

        return control
Ejemplo n.º 22
0
def main(argv):
    p = ArgumentParser(prog=basename(argv[0]),
                       description="Hash the contents of archives")
    p.add_argument("--debug",
                   action="store_true",
                   help="Enable debugging output")
    p.add_argument("archives", metavar="ARCHIVES", nargs="*")
    args = p.parse_args(argv[1:])
    root_logger = logging.getLogger()
    logging.basicConfig()
    if args.debug:
        root_logger.setLevel(logging.DEBUG)
    else:
        root_logger.setLevel(logging.INFO)
    hash = hashlib.sha256()
    for archive_filename in args.archives:
        with open(archive_filename, "rb") as archive_file:
            archive = arpy.Archive(fileobj=archive_file)
            log.debug("opened archive %r", archive_filename)
            for arfile in archive:
                hash.update(arfile.header.name)
                nbytes = 0
                filehash = hashlib.sha256()
                while True:
                    buf = arfile.read(32768)
                    if not buf:
                        break
                    hash.update(buf)
                    filehash.update(buf)
                    nbytes += len(buf)
                log.debug("hashed %s/%s %r %s bytes", archive_filename,
                          arfile.header.name.decode("utf-8"),
                          filehash.hexdigest(), nbytes)
    # 128 bits of entropy is enough for anyone
    digest = hash.digest()[:16]
    log.debug("digest %r", digest)
    print(b64encode(digest, b"@_").decode("ascii").rstrip("="))
Ejemplo n.º 23
0
 def __init__(self, filename=None, fileobj=None):
     self.file = fileobj or open(filename, "rb")
     self.ar = arpy.Archive(fileobj=self.file)
     self.ar.read_all_headers()
Ejemplo n.º 24
0
 def test_bad_table_reference(self):
     bad_ar = b'!<arch>\n//                                               0        `\n' \
      b'/9              1297730011  1000  1000  100644  0         `\n'
     ar = arpy.Archive(fileobj=io.BytesIO(bad_ar))
     self.assertRaises(arpy.ArchiveFormatError, ar.read_all_headers)
Ejemplo n.º 25
0
 def test_bad_table_size(self):
     bad_ar = b'!<arch>\n//                                              10        `\n'
     ar = arpy.Archive(fileobj=io.BytesIO(bad_ar))
     self.assertRaises(arpy.ArchiveFormatError, ar.read_all_headers)
Ejemplo n.º 26
0
 def test_bad_file_size(self):
     bad_ar = b'!<arch>\nfile1/          1364071329  1000  100   100644  15        `\nabc'
     ar = arpy.Archive(fileobj=io.BytesIO(bad_ar))
     ar.read_all_headers()
     f1 = ar.archived_files[b'file1']
     self.assertRaises(arpy.ArchiveAccessError, f1.read)
Ejemplo n.º 27
0
 def test_bad_file_header_nums(self):
     bad_ar = b'!<arch>\nfile1/          aaaa071329  1000  100   100644  15        `\n'
     ar = arpy.Archive(fileobj=io.BytesIO(bad_ar))
     self.assertRaises(arpy.ArchiveFormatError, ar.read_all_headers)
Ejemplo n.º 28
0
 def test_bad_file_header_short(self):
     bad_ar = b'!<arch>\nfile1/          1364071329  1000'
     ar = arpy.Archive(fileobj=io.BytesIO(bad_ar))
     self.assertRaises(arpy.ArchiveFormatError, ar.read_all_headers)
Ejemplo n.º 29
0
    def setUp(self):
        self.ar = arpy.Archive(
            os.path.join(os.path.dirname(__file__), 'contents.ar'))
        self.ar.read_all_headers()

        self.f1 = self.ar.archived_files[b'file1']
Ejemplo n.º 30
0
	def test_header_description(self):
		ar = arpy.Archive(os.path.join(os.path.dirname(__file__), 'normal.ar'))
		header = ar.read_next_header()
		self.assertTrue(repr(header).startswith('<ArchiveFileHeader'))
		ar.close()