Beispiel #1
0
    def test_decompressor_inputbuf_2(self):
        # Test reusing input buffer by appending data at the
        # end right away
        bzd = BZ2Decompressor()
        out = []

        # Create input buffer and empty it
        self.assertEqual(bzd.decompress(self.DATA[:200], max_length=0), b'')
        out.append(bzd.decompress(b''))

        # Fill buffer with new data
        out.append(bzd.decompress(self.DATA[200:280], 2))

        # Append some more data, not enough to require resize
        out.append(bzd.decompress(self.DATA[280:300], 2))

        # Decompress rest of data
        out.append(bzd.decompress(self.DATA[300:]))
        self.assertEqual(b''.join(out), self.TEXT)
Beispiel #2
0
    def __init__(self, fpatch, compression):
        if compression == 'lzma':
            self.decompressor = LZMADecompressor()
        elif compression == 'bz2':
            self.decompressor = BZ2Decompressor()
        elif compression == 'crle':
            self.decompressor = CrleDecompressor(patch_data_length(fpatch))
        elif compression == 'none':
            self.decompressor = NoneDecompressor(patch_data_length(fpatch))
        elif compression == 'heatshrink':
            self.decompressor = HeatshrinkDecompressor(patch_data_length(fpatch))
        elif compression == 'zstd':
            self.decompressor = ZstdDecompressor(patch_data_length(fpatch))
        elif compression == 'lz4':
            self.decompressor = Lz4Decompressor()
        else:
            raise Error(format_bad_compression_string(compression))

        self._fpatch = fpatch
    def test_decompressor_inputbuf_1(self):
        # Test reusing input buffer after moving existing
        # contents to beginning
        bzd = BZ2Decompressor()
        out = []

        # Create input buffer and fill it
        self.assertEqual(bzd.decompress(self.DATA[:100], max_length=0), b'')

        # Retrieve some results, freeing capacity at beginning
        # of input buffer
        out.append(bzd.decompress(b'', 2))

        # Add more data that fits into input buffer after
        # moving existing data to beginning
        out.append(bzd.decompress(self.DATA[100:105], 15))

        # Decompress rest of data
        out.append(bzd.decompress(self.DATA[105:]))
        self.assertEqual(b''.join(out), self.TEXT)
Beispiel #4
0
 def httpFinished(self):
     self.outFile.close()
     if self.httpRequestAborted or self._reply.error():
         self.outFile.remove()
     self._reply.deleteLater()
     del self._reply
     # 下载完成解压文件并加载摄像头
     self.setText("正在解压数据。。。")
     try:
         bz = BZ2Decompressor()
         data = bz.decompress(
             open('D:/access55/shape_predictor_68_face_landmarks.dat.bz2',
                  'rb').read())
         open('D:/access55/shape_predictor_68_face_landmarks.dat',
              'wb').write(data)
     except Exception as e:
         self.setText('解压失败:' + str(e))
         return
     self.setText('正在开启摄像头。。。')
     self.startCapture()
Beispiel #5
0
    def _releases(self) -> Dict[str, Dict[str, Dict[str, Any]]]:
        all_deps = defaultdict(dict)
        for channel in self._channels:
            cache = JSONCache('conda.anaconda.org', 'releases', channel, ttl=config['cache']['ttl'])
            channel_deps = cache.load()
            if channel_deps is not None:
                for dep, releases in channel_deps.items():
                    all_deps[dep].update(releases)
                continue

            channel_deps = defaultdict(dict)
            for url in self._get_urls(channel=channel):
                response = requests.get(url)
                response.raise_for_status()
                content = BZ2Decompressor().decompress(response.content).decode('utf-8')
                base_url = url.rsplit('/', 1)[0]
                for fname, info in json.loads(content)['packages'].items():
                    # release info
                    name = canonicalize_name(info.pop('name'))
                    version = info.pop('version')
                    if version not in channel_deps[name]:
                        channel_deps[name][version] = dict(
                            depends=set(),
                            timestamp=info.get('timestamp', 0) // 1000,
                            files=[],
                        )
                    # file info
                    channel_deps[name][version]['depends'].update(info['depends'])
                    channel_deps[name][version]['files'].append(dict(
                        url=base_url + '/' + fname,
                        sha256=info.get('sha256', None),
                        size=info['size'],
                    ))

            for dep, releases in channel_deps.items():
                for release in releases.values():
                    release['depends'] = list(release['depends'])
                all_deps[dep].update(releases)
            cache.dump(channel_deps)

        return dict(all_deps)
Beispiel #6
0
    def run(self):
        while True:
            delay = self.config.get("phishtank_update_delay")
            try:
                delay = int(delay)
            except (ValueError, TypeError):
                delay = 0
            if delay < 1:
                self.logger.info("update not set or <1, not running")
                time.sleep(60)
                continue
            if delay < 3600:
                self.logging.info(
                    "specified delay of {} too short, forcing to 1 hour".
                    format(delay))
                delay = 3600

            apikey = self.config.get("phishtank_api_key")
            if not apikey:
                self.logger.info("apikey not configured, not running")
                continue

            self.logger.info("running phishtank update operation")
            res = requests.get(PhishTank.FILE_URL.format(apikey))
            if res.status_code != requests.codes.ok:
                self.logging.info("error fetching file: {} - {}".format(
                    res.status_code, res.text))
                continue
            # let any error bubble up
            archive = StringIO()
            decompressor = BZ2Decompressor()
            data = decompressor.decompress(res.content)
            archive.write(data.decode("us-ascii"))
            dec = JSONDecoder()
            json_data = dec.decode(archive.getvalue())
            for item in json_data:
                url = item.get("url")
                if url:
                    self.cache.setex(url, delay, "suspicious")
            self.logger.info("phishtank update operation completed")
            time.sleep(delay)
Beispiel #7
0
def try_decompress_at(input_file: bytes, offset: int) -> bytes:

    decoded = None

    try:

        if input_file[offset:offset + 3] == b'\x1f\x8b\x08':  # GZIP Signature
            decoded = SingleGzipReader(BytesIO(input_file[offset:])).read(
                -1
            )  # Will stop reading after the GZip footer thanks to our modification above.

        elif input_file[offset:offset + 6] == b'\xfd7zXZ\x00' or input_file[
                offset:offset + 3] == b']\x00\x00':  # XZ/LZMA Signature

            try:
                decoded = LZMADecompressor().decompress(
                    input_file[offset:]
                )  # Will discard the extra bytes and put it an attribute.
            except Exception:
                decoded = LZMADecompressor().decompress(
                    input_file[offset:offset + 5] + b'\xff' * 8 +
                    input_file[offset + 5:])  # pylzma format compatibility

        elif input_file[offset:offset + 3] == b'BZh':  # BZ2 Signature
            decoded = BZ2Decompressor().decompress(
                input_file[offset:]
            )  # Will discard the extra bytes and put it an attribute.

    except Exception:

        pass

    if decoded and len(decoded) > 0x1000:
        print((
            '[+] Kernel successfully decompressed in-memory (the offsets that '
            + 'follow will be given relative to the decompressed binary)'))

        return decoded
Beispiel #8
0
    def get_metadata(self):
        """
        Get the namespace correspondance from the begining of the bz2 file
        + get the number of pages
        """
        # uncompressing data
        with open(self.path_index) as index:
            max_byte = int(index.readline().split(':')[0])
        with open(self.path_data, 'rb') as data:
            byte_file = data.read(565)
            uncompressed_data = BZ2Decompressor().decompress(byte_file).decode()

        # getting namespaces
        st = uncompressed_data+"</mediawiki>"
        m = re.search(r'xmlns=[^ ]+? ', st)
        st = st[:m.start()] + st[m.end():]
        root_header = ET.fromstring(st)
        namespaces_dict = {i.attrib['key']:i.text for i in root_header[0][5]}

        # get size of index:
        with os.popen('cat {} | wc -l'.format(self.path_index)) as cmd:
            total = int(cmd.read())
        return namespaces_dict, total
Beispiel #9
0
    def get_data(self, url):
        try:
            tmp_handle = tempfile.NamedTemporaryFile()
            response = requests.get(url, stream=True)

            if not response.ok:
                raise Exception('Failed to download from url "{0}".'.format(
                    self.cached_url))

            # represents if decompression should be applied
            decompress = self.decompress

            if decompress:
                # create sequential decompressor
                decompressor = BZ2Decompressor()

            # iterate over the image data in chunks
            for chunk in response.iter_content(1024 * 1024):
                if not chunk:
                    break
                if decompress:
                    # send data through decompressor if necessary
                    tmp_handle.write(decompressor.decompress(chunk))
                else:
                    # write data directly into tmp file
                    tmp_handle.write(chunk)

            # clean up your open connections
            response.connection.close()

            # reset file descriptor to position 0 before returning it
            tmp_handle.seek(0)

        except Exception as e:
            raise Exception('Failed to get image: "{0}".'.format(str(e)))

        return tmp_handle
Beispiel #10
0
    def _fill_buffer(self):
        if self._buffer:
            return True

        if self._decompressor.unused_data:
            rawblock = self._decompressor.unused_data
        else:
            rawblock = self._fp.read(_BUFFER_SIZE)

        if not rawblock:
            if self._decompressor.eof:
                self._mode = _MODE_READ_EOF
                self._size = self._pos
                return False
            else:
                raise EOFError("Compressed file ended before the "
                               "end-of-stream marker was reached")

        # Continue to next stream.
        if self._decompressor.eof:
            self._decompressor = BZ2Decompressor()

        self._buffer = self._decompressor.decompress(rawblock)
        return True
Beispiel #11
0
    def _fill_buffer(self):
        if self._mode == _MODE_READ_EOF:
            return False
        # Depending on the input data, our call to the decompressor may not
        # return any data. In this case, try again after reading another block.
        while self._buffer_offset == len(self._buffer):
            rawblock = (self._decompressor.unused_data
                        or self._fp.read(_BUFFER_SIZE))

            if not rawblock:
                try:
                    self._decompressor.decompress(b"")
                except EOFError:
                    # End-of-stream marker and end of file. We're good.
                    self._mode = _MODE_READ_EOF
                    self._size = self._pos
                    return False
                else:
                    # Problem - we were expecting more compressed data.
                    raise EOFError("Compressed file ended before the "
                                   "end-of-stream marker was reached")

            try:
                self._buffer = self._decompressor.decompress(rawblock)
            except EOFError:
                # Continue to next stream.
                self._decompressor = BZ2Decompressor()
                try:
                    self._buffer = self._decompressor.decompress(rawblock)
                except IOError:
                    # Trailing data isn't a valid bzip2 stream. We're done here.
                    self._mode = _MODE_READ_EOF
                    self._size = self._pos
                    return False
            self._buffer_offset = 0
        return True
 def testEOFError(self):
     bz2d = BZ2Decompressor()
     text = bz2d.decompress(self.DATA)
     self.assertRaises(EOFError, bz2d.decompress, b"anything")
     self.assertRaises(EOFError, bz2d.decompress, b"")
 def testPickle(self):
     for proto in range(pickle.HIGHEST_PROTOCOL + 1):
         with self.assertRaises(TypeError):
             pickle.dumps(BZ2Decompressor(), proto)
Beispiel #14
0
 def test_failure(self):
     bzd = BZ2Decompressor()
     self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
     self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
 def testDecompressUnusedData(self):
     bz2d = BZ2Decompressor()
     unused_data = b"this is unused data"
     text = bz2d.decompress(self.DATA + unused_data)
     self.assertEqual(text, self.TEXT)
     self.assertEqual(bz2d.unused_data, unused_data)
Beispiel #16
0
 def test_failure(self):
     bzd = BZ2Decompressor()
     self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
     # Previously, a second call could crash due to internal inconsistency
     self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
Beispiel #17
0
def apply_patch_bsdiff(ffrom, fpatch, fto):
    """Apply given bsdiff patch `fpatch` to `ffrom` to create
    `fto`. Returns the size of the created to-data.

    All arguments are file-like objects.

    >>> ffrom = open('foo.mem', 'rb')
    >>> fpatch = open('foo-bsdiff.patch', 'rb')
    >>> fto = open('foo.new', 'wb')
    >>> apply_patch_bsdiff(ffrom, fpatch, fto)
    2780

    """

    ctrl_size, diff_size, to_size = read_header_bsdiff(fpatch)

    ctrl_decompressor = BZ2Decompressor()
    diff_decompressor = BZ2Decompressor()
    extra_decompressor = BZ2Decompressor()

    ctrl_decompressor.decompress(fpatch.read(ctrl_size), 0)
    diff_decompressor.decompress(fpatch.read(diff_size), 0)
    extra_decompressor.decompress(fpatch.read(), 0)

    to_pos = 0

    while to_pos < to_size:
        # Control data.
        diff_size = offtin(ctrl_decompressor.decompress(b'', 8))
        extra_size = offtin(ctrl_decompressor.decompress(b'', 8))
        adjustment = offtin(ctrl_decompressor.decompress(b'', 8))

        # Diff data.
        if to_pos + diff_size > to_size:
            raise Error("Patch diff data too long.")

        if diff_size > 0:
            diff_data = diff_decompressor.decompress(b'', diff_size)
            from_data = ffrom.read(diff_size)
            fto.write(bsdiff.add_bytes(diff_data, from_data))
            to_pos += diff_size

        # Extra data.
        if to_pos + extra_size > to_size:
            raise Error("Patch extra data too long.")

        if extra_size > 0:
            extra_data = extra_decompressor.decompress(b'', extra_size)
            fto.write(extra_data)
            to_pos += extra_size

        # Adjustment.
        ffrom.seek(adjustment, os.SEEK_CUR)

    if not ctrl_decompressor.eof:
        raise Error('End of control data not found.')

    if not diff_decompressor.eof:
        raise Error('End of diff data not found.')

    if not extra_decompressor.eof:
        raise Error('End of extra data not found.')

    return to_size
Beispiel #18
0
 def test_subsequent_read(self):
     from bz2 import BZ2Decompressor
     bz2d = BZ2Decompressor()
     decompressed_data = bz2d.decompress(self.BUGGY_DATA)
     assert decompressed_data == ''
     raises(IOError, bz2d.decompress, self.BUGGY_DATA)
 def __init__(self, stream):
     self.bzip2 = BZ2Decompressor()
Beispiel #20
0
 def __init__(self, key, bufsize=100 * 1024):
     self.key = key
     self.bufsize = bufsize
     self._decomp = BZ2Decompressor()
     self._lines = []
     self._done = False
Beispiel #21
0
 def test_buffer(self):
     from bz2 import BZ2Decompressor
     bz2d = BZ2Decompressor()
     decompressed_data = bz2d.decompress(buffer(self.DATA))
     assert decompressed_data == self.TEXT
Beispiel #22
0
def try_decompress_at(input_file: bytes, offset: int) -> bytes:

    decoded = None
    try:

        if Signature.check(
                input_file, offset, Signature.DTB_Appended_Qualcomm
        ):  # Merely unpack a Qualcomm kernel file containing a magic and DTB offset at the start (so that offsets aren't wrong)

            dtb_offset_le = int.from_bytes(input_file[offset + 16:offset + 20],
                                           'little')
            dtb_offset_be = int.from_bytes(input_file[offset + 16:offset + 20],
                                           'big')

            decoded = input_file[offset + 20:offset + 20 +
                                 min(dtb_offset_le, dtb_offset_be)]

        elif Signature.check(
                input_file, offset, Signature.Android_Bootimg
        ):  # Unpack an uncompressed Android Bootimg file, version 0, 1, 2 or 3

            # See, for reference:
            # - https://github.com/osm0sis/mkbootimg/blob/master/unpackbootimg.c
            # - https://github.com/osm0sis/mkbootimg/blob/master/bootimg.h

            assert len(input_file) > 4096

            header_version_raw = input_file[offset + 10 * 4:offset + 11 * 4]

            endianness = 'little'

            if header_version_raw in (b'\0\0\0\3', b'\3\0\0\0'):
                page_size = 4096

                if header_version_raw == b'\0\0\0\3':
                    endianness = 'big'

            else:
                page_size_raw = input_file[offset + 9 * 4:offset + 10 * 4]

                page_size_le = int.from_bytes(page_size_raw, 'little')
                page_size_be = int.from_bytes(page_size_raw, 'big')

                if page_size_le < page_size_be:
                    page_size = page_size_le
                else:
                    endianness = 'big'
                    page_size = page_size_be

            kernel_size = int.from_bytes(
                input_file[offset + 2 * 4:offset + 3 * 4], endianness)

            assert len(input_file) > kernel_size > 0x1000
            assert len(input_file) > page_size > 0x200

            decoded = input_file[offset + page_size:offset + page_size +
                                 kernel_size]

            # Also try to re-unpack the output image in the case where the nested
            # kernel would start with a "UNCOMPRESSED_IMG" Qualcomm magic, for example

            decoded = try_decompress_at(decoded, 0) or decoded

        elif Signature.check(input_file, offset, Signature.Compressed_GZIP):
            decoded = SingleGzipReader(BytesIO(input_file[offset:])).read(
                -1
            )  # GZIP - Will stop reading after the GZip footer thanks to our modification above.

        elif (Signature.check(input_file, offset, Signature.Compressed_XZ) or
              Signature.check(input_file, offset, Signature.Compressed_LZMA)):
            try:
                decoded = LZMADecompressor().decompress(
                    input_file[offset:]
                )  # LZMA - Will discard the extra bytes and put it an attribute.

            except Exception:
                decoded = LZMADecompressor().decompress(
                    input_file[offset:offset + 5] + b'\xff' * 8 +
                    input_file[offset + 5:])  # pylzma format compatibility

        elif Signature.check(input_file, offset, Signature.Compressed_BZ2):
            decoded = BZ2Decompressor().decompress(
                input_file[offset:]
            )  # BZ2 - Will discard the extra bytes and put it an attribute.

        elif Signature.check(input_file, offset,
                             Signature.Compressed_LZ4):  # LZ4 support
            try:
                LZ4Decompressor = importlib.import_module('lz4.frame')

            except ModuleNotFoundError:
                logging.error('ERROR: This kernel requres LZ4 decompression.')
                logging.error('       But "lz4" python package was not found.')
                logging.error(
                    '       Example installation command: "sudo pip3 install lz4"'
                )
                logging.error()
                return

            context = LZ4Decompressor.create_decompression_context()
            decoded, bytes_read, end_of_frame = LZ4Decompressor.decompress_chunk(
                context, input_file[offset:])

        elif Signature.check(input_file, offset,
                             Signature.Compressed_LZ4_Legacy
                             ):  # LZ4 support (legacy format)

            try:
                from utils.lz4_legacy import decompress_lz4_buffer
            except ImportError:
                try:
                    from vmlinux_to_elf.utils.lz4_legacy import decompress_lz4_buffer
                except ModuleNotFoundError:
                    logging.error(
                        'ERROR: This kernel requres LZ4 decompression.')
                    logging.error(
                        '       But "lz4" python package was not found.')
                    logging.error(
                        '       Example installation command: "sudo pip3 install lz4"'
                    )
                    logging.error()
                    return

            decoded = decompress_lz4_buffer(BytesIO(input_file[offset:]))

        elif Signature.check(input_file, offset, Signature.Compressed_ZSTD):
            try:
                import zstandard as zstd
            except ModuleNotFoundError:
                logging.error('ERROR: This kernel requres ZSTD decompression.')
                logging.error(
                    '       But "zstandard" python package was not found.')
                logging.error(
                    '       Example installation command: "sudo pip3 install zstandard"'
                )
                logging.error()
                return
            buf = BytesIO()
            context = zstd.ZstdDecompressor()
            for chunk in context.read_to_iter(BytesIO(input_file[offset:])):
                buf.write(chunk)
            buf.seek(0)
            decoded = buf.read()

        elif Signature.check(input_file, offset, Signature.Compressed_LZO):
            try:
                import lzo
            except ModuleNotFoundError:
                logging.error('ERROR: This kernel requres LZO decompression.')
                logging.error(
                    '       But "python-lzo" python package was not found.')
                logging.error(
                    '       Example installation command: "sudo pip3 install git+https://github.com/clubby789/python-lzo@b4e39df"'
                )
                logging.error()
                return
            buf = BytesIO(input_file[offset:])
            decoded = lzo.LzoFile(fileobj=buf, mode='rb').read()
    except Exception:
        pass

    if decoded and len(decoded) > 0x1000:
        logging.info((
            '[+] Kernel successfully decompressed in-memory (the offsets that '
            + 'follow will be given relative to the decompressed binary)'))

        return decoded
Beispiel #23
0
 def testDecompress(self):
     # "Test BZ2Decompressor.decompress()"
     bz2d = BZ2Decompressor()
     self.assertRaises(TypeError, bz2d.decompress)
     text = bz2d.decompress(self.DATA)
     self.assertEqual(text, self.TEXT)
Beispiel #24
0
 def __init__(self, fileobj):
     self.__fileobj = fileobj
     self.__is_finished = False
     self.__decompressor = BZ2Decompressor()
Beispiel #25
0
def _get_X_y(dataset, multilabel, replace=False):
    """Load a LIBSVM dataset as sparse X and observation y/Y.
    If X and y already exists as npz and npy, they are not redownloaded unless
    replace=True."""

    # some files are compressed, some are not:
    if NAMES[dataset].endswith('.bz2'):
        stripped_name = NAMES[dataset][:-4]
    else:
        stripped_name = NAMES[dataset]

    ext = '.npz' if multilabel else '.npy'
    y_path = DATA_HOME / f"{stripped_name}_target{ext}"
    X_path = DATA_HOME / f"{stripped_name}_data.npz"
    if replace or not y_path.exists() or not X_path.exists():
        tmp_path = DATA_HOME / stripped_name

        # Download the dataset
        source_path = DATA_HOME / NAMES[dataset]
        if not source_path.parent.exists():
            source_path.parent.mkdir(parents=True)
        download_libsvm(dataset, source_path, replace=replace)

        # decompress file only if it is compressed
        if NAMES[dataset].endswith('.bz2'):
            decompressor = BZ2Decompressor()
            print("Decompressing...")
            with open(tmp_path, "wb") as f, open(source_path, "rb") as g:
                for data in iter(lambda: g.read(100 * 1024), b''):
                    f.write(decompressor.decompress(data))
            source_path.unlink()

        n_features_total = N_FEATURES[dataset]

        print("Loading svmlight file...")
        with open(tmp_path, 'rb') as f:
            X, y = load_svmlight_file(
                f, n_features=n_features_total, multilabel=multilabel)

        tmp_path.unlink()
        X = sparse.csc_matrix(X)
        X.sort_indices()
        sparse.save_npz(X_path, X)

        if multilabel:
            indices = np.array([lab for labels in y for lab in labels])
            indptr = np.cumsum([0] + [len(labels) for labels in y])
            data = np.ones_like(indices)
            Y = sparse.csr_matrix((data, indices, indptr))
            sparse.save_npz(y_path, Y)
            return X, Y

        else:
            np.save(y_path, y)

    else:
        X = sparse.load_npz(X_path)
        if multilabel:
            y = sparse.load_npz(y_path)
        else:
            y = np.load(y_path)

    return X, y
Beispiel #26
0
    def _decompressor(self):

        return BZ2Decompressor()
Beispiel #27
0
    def test_creation(self):
        from bz2 import BZ2Decompressor

        raises(TypeError, BZ2Decompressor, "foo")

        BZ2Decompressor()
Beispiel #28
0
    def test_EOF_error(self):
        from bz2 import BZ2Decompressor

        bz2d = BZ2Decompressor()
        bz2d.decompress(self.DATA)
        raises(EOFError, bz2d.decompress, "foo")
Beispiel #29
0
 def testEOFError(self):
     # "Calling BZ2Decompressor.decompress() after EOS must raise EOFError"
     bz2d = BZ2Decompressor()
     text = bz2d.decompress(self.DATA)
     self.assertRaises(EOFError, bz2d.decompress, "anything")
     self.assertRaises(EOFError, bz2d.decompress, "")
Beispiel #30
0
    def test_attribute(self):
        from bz2 import BZ2Decompressor

        bz2d = BZ2Decompressor()
        assert bz2d.unused_data == ""