Beispiel #1
0
def demo_get(service, params, compress=False):
    url = build_url(service, params, compress)

    print("REQ:")
    print(url)

    ua = UAResponse()
    msg = MSG()

    print()

    print("RESP:")

    if compress:
        dst = io.BytesIO()
        snappy.stream_decompress(urlopen(url), dst)
        data = dst.getvalue()
    else:
        data = urlopen(url).read()

    ua.ParseFromString(data)
    if ua.Err == 0:
        msg.ParseFromString(ua.Data)

        # TODO: handle msg here
        print(msg)
    else:
        print(ua)
def process_file(inpath, outpath):
    """Decompress snappy file into destination
    """
    logging.debug("Parsing %s into %s", inpath, outpath)
    with open(inpath, 'rb') as fin, open(outpath, 'wb') as fout:
        snappy.stream_decompress(fin, fout)
    return outpath
def decompress_to_file(blob, dest_file, compression):
    """
    Decompresses the supplied blob of data into the dest_file file-like object using
    the specified compression.

    :param IOBase blob: A file-like object containing the compressed data.
    :param IOBase dest_file: A file-like object into which the uncompressed data
      should be written.
    :param str compression: The compression algorithm to apply. Can be one of:
      bzip2, gzip, snappy.
    :rtype: None
    """
    if compression == "snappy":
        snappy = _try_import_snappy()
        snappy.stream_decompress(blob, dest_file)
        return
    elif compression == "gzip":
        source_file = gzip.GzipFile(fileobj=blob, mode="rb")
    elif compression == "bzip2":
        source_file = bz2.BZ2File(blob, "rb")
    else:
        raise ValueError("Unknown compression type: %s" % compression)

    with source_file:
        shutil.copyfileobj(source_file, dest_file)
Beispiel #4
0
def snappy_decompress(data):
    if sz is None:
        raise RuntimeError('Snappy decompression is not available')
    ## sz.decompress is broken per https://github.com/andrix/python-snappy/issues/28
    fh = StringIO()
    sz.stream_decompress(StringIO(data), fh)
    return fh.getvalue()
def decompress_snappy(path):
    directory_path = path[:-7]
    with open(path, 'rb') as in_file:
        with open(path_to_store, 'w') as out_file:
            snappy.stream_decompress(in_file, out_file)
            out_file.close()
            in_file.close()
    return directory_path
Beispiel #6
0
 def stream_decompress(src_filename, dst_filename):
     """
     文件解压
     :param src_filename:    源文件文件名称
     :param dst_filename:    压缩后的文件名称
     :return: 
     """
     with open(src_filename, "rb") as rf, open(dst_filename, "wb") as wf:
         stream_decompress(rf, wf)
Beispiel #7
0
    def unpack(self):
        out_labels = []
        unpacked_files = []

        # check if the file starts at offset 0. If not, carve the
        # file first, as snappy tries to be smart and unpack
        # all concatenated snappy data in a file.
        havetmpfile = False

        if not (self.offset == 0
                and self.fileresult.filesize == self.unpacked_size):
            temporary_file = tempfile.mkstemp(
                dir=self.scan_environment.temporarydirectory)
            havetmpfile = True
            os.sendfile(temporary_file[0], self.infile.fileno(), self.offset,
                        self.unpacked_size)
            os.fdopen(temporary_file[0]).close()

        # determine the name of the output file
        if self.fileresult.filename.suffix.lower() == '.sz':
            file_path = pathlib.Path(self.fileresult.filename.stem)
        else:
            file_path = pathlib.Path("unpacked_from_snappy")

        outfile_rel = self.rel_unpack_dir / file_path
        outfile_full = self.scan_environment.unpack_path(outfile_rel)
        os.makedirs(outfile_full.parent, exist_ok=True)
        outfile = open(outfile_full, 'wb')

        if havetmpfile:
            infile = open(temporary_file[1], 'rb')
        else:
            infile = self.infile.infile
            infile.seek(0)

        try:
            snappy.stream_decompress(infile, outfile)
            outfile.close()
        except Exception as e:
            outfile.close()
            if havetmpfile:
                infile.close()
                os.unlink(temporary_file[1])
            raise UnpackParserException(e.args)
            #return unpacked_files
        if havetmpfile:
            os.unlink(temporary_file[1])

        fr = FileResult(self.fileresult, self.rel_unpack_dir / file_path,
                        set(out_labels))
        unpacked_files.append(fr)
        return unpacked_files
Beispiel #8
0
    def _read_file(self, f: "pyarrow.NativeFile", path: str, **reader_args):
        from pyarrow.fs import HadoopFileSystem

        include_paths = reader_args.pop("include_paths", False)
        if reader_args.get("compression") == "snappy":
            import snappy

            filesystem = reader_args.get("filesystem", None)
            rawbytes = BytesIO()

            if isinstance(filesystem, HadoopFileSystem):
                snappy.hadoop_snappy.stream_decompress(src=f, dst=rawbytes)
            else:
                snappy.stream_decompress(src=f, dst=rawbytes)

            data = rawbytes.getvalue()
        else:
            data = f.readall()
        if include_paths:
            return [(path, data)]
        else:
            return [data]
Beispiel #9
0
def decompress_snappy(data):
    bio_in = BytesIO(data)
    bio_out = BytesIO()
    snappy.stream_decompress(bio_in, bio_out)
    return bio_out.getvalue()
Beispiel #10
0
def decrypt_and_uncompress(data,
                           gpg_private=None,
                           tmp_dir=None,
                           compression='xz'):
    '''Given a data buffer of bytes, if gpg_key_path is provided, decrypt
    data using gnupg, and uncompress using `compression` scheme, which
    defaults to "xz" and can also be "gz", "sz", or "".

    :returns: a tuple of (logs, data), where `logs` is an array of
      strings and data is a binary string

    '''
    if not data:
        logger.error('decrypt_and_uncompress starting with empty data')
        return ['no data'], None
    _errors = []
    if gpg_private is not None:
        ### setup gpg for decryption
        gpg_dir = os.tempnam(tmp_dir, 'tmp-compress-and-encrypt-')
        os.makedirs(gpg_dir)
        try:
            gpg_child = subprocess.Popen([
                'gpg', '--no-permission-warning', '--homedir', gpg_dir,
                '--import', gpg_private
            ],
                                         stderr=subprocess.PIPE)
            s_out, errors = gpg_child.communicate()
            if errors:
                _errors.append('gpg logs to stderr, read carefully:\n\n%s' %
                               errors)

            ## decrypt it, and free memory
            ## encrypt using the fingerprint for our trec-kba-rsa key pair
            gpg_child = subprocess.Popen(
                ## setup gpg to decrypt with trec-kba private key
                ## (i.e. make it the recipient), with zero compression,
                ## ascii armoring is off by default, and --output - must
                ## appear before --decrypt -
                [
                    'gpg', '--no-permission-warning', '--homedir', gpg_dir,
                    '--trust-model', 'always', '--output', '-', '--decrypt',
                    '-'
                ],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            ## communicate with child via its stdin
            data, errors = gpg_child.communicate(data)
            if errors:
                _errors.append(errors)

        finally:
            ## remove the gpg_dir
            shutil.rmtree(gpg_dir, ignore_errors=True)

        if not data:
            logger.error('empty data after gpg decrypt')
            _errors.append('gpg -> no data')
            return _errors, None

    if compression == 'xz':
        data = xz_decompress(data)
    elif compression == 'sz':
        ## sz.decompress is broken per https://github.com/andrix/python-snappy/issues/28
        fh = StringIO()
        sz.stream_decompress(StringIO(data), fh)
        data = fh.getvalue()
    elif compression == 'gz':
        fh = StringIO(data)
        gz_fh = gz.GzipFile(fileobj=fh, mode='r')
        data = gz_fh.read(data)
    elif compression == "" or compression is None:
        ## data is not compressed
        pass

    return _errors, data