def demo_get(service, params, compress=False): url = build_url(service, params, compress) print("REQ:") print(url) ua = UAResponse() msg = MSG() print() print("RESP:") if compress: dst = io.BytesIO() snappy.stream_decompress(urlopen(url), dst) data = dst.getvalue() else: data = urlopen(url).read() ua.ParseFromString(data) if ua.Err == 0: msg.ParseFromString(ua.Data) # TODO: handle msg here print(msg) else: print(ua)
def process_file(inpath, outpath): """Decompress snappy file into destination """ logging.debug("Parsing %s into %s", inpath, outpath) with open(inpath, 'rb') as fin, open(outpath, 'wb') as fout: snappy.stream_decompress(fin, fout) return outpath
def decompress_to_file(blob, dest_file, compression): """ Decompresses the supplied blob of data into the dest_file file-like object using the specified compression. :param IOBase blob: A file-like object containing the compressed data. :param IOBase dest_file: A file-like object into which the uncompressed data should be written. :param str compression: The compression algorithm to apply. Can be one of: bzip2, gzip, snappy. :rtype: None """ if compression == "snappy": snappy = _try_import_snappy() snappy.stream_decompress(blob, dest_file) return elif compression == "gzip": source_file = gzip.GzipFile(fileobj=blob, mode="rb") elif compression == "bzip2": source_file = bz2.BZ2File(blob, "rb") else: raise ValueError("Unknown compression type: %s" % compression) with source_file: shutil.copyfileobj(source_file, dest_file)
def snappy_decompress(data): if sz is None: raise RuntimeError('Snappy decompression is not available') ## sz.decompress is broken per https://github.com/andrix/python-snappy/issues/28 fh = StringIO() sz.stream_decompress(StringIO(data), fh) return fh.getvalue()
def decompress_snappy(path): directory_path = path[:-7] with open(path, 'rb') as in_file: with open(path_to_store, 'w') as out_file: snappy.stream_decompress(in_file, out_file) out_file.close() in_file.close() return directory_path
def stream_decompress(src_filename, dst_filename): """ 文件解压 :param src_filename: 源文件文件名称 :param dst_filename: 压缩后的文件名称 :return: """ with open(src_filename, "rb") as rf, open(dst_filename, "wb") as wf: stream_decompress(rf, wf)
def unpack(self): out_labels = [] unpacked_files = [] # check if the file starts at offset 0. If not, carve the # file first, as snappy tries to be smart and unpack # all concatenated snappy data in a file. havetmpfile = False if not (self.offset == 0 and self.fileresult.filesize == self.unpacked_size): temporary_file = tempfile.mkstemp( dir=self.scan_environment.temporarydirectory) havetmpfile = True os.sendfile(temporary_file[0], self.infile.fileno(), self.offset, self.unpacked_size) os.fdopen(temporary_file[0]).close() # determine the name of the output file if self.fileresult.filename.suffix.lower() == '.sz': file_path = pathlib.Path(self.fileresult.filename.stem) else: file_path = pathlib.Path("unpacked_from_snappy") outfile_rel = self.rel_unpack_dir / file_path outfile_full = self.scan_environment.unpack_path(outfile_rel) os.makedirs(outfile_full.parent, exist_ok=True) outfile = open(outfile_full, 'wb') if havetmpfile: infile = open(temporary_file[1], 'rb') else: infile = self.infile.infile infile.seek(0) try: snappy.stream_decompress(infile, outfile) outfile.close() except Exception as e: outfile.close() if havetmpfile: infile.close() os.unlink(temporary_file[1]) raise UnpackParserException(e.args) #return unpacked_files if havetmpfile: os.unlink(temporary_file[1]) fr = FileResult(self.fileresult, self.rel_unpack_dir / file_path, set(out_labels)) unpacked_files.append(fr) return unpacked_files
def _read_file(self, f: "pyarrow.NativeFile", path: str, **reader_args): from pyarrow.fs import HadoopFileSystem include_paths = reader_args.pop("include_paths", False) if reader_args.get("compression") == "snappy": import snappy filesystem = reader_args.get("filesystem", None) rawbytes = BytesIO() if isinstance(filesystem, HadoopFileSystem): snappy.hadoop_snappy.stream_decompress(src=f, dst=rawbytes) else: snappy.stream_decompress(src=f, dst=rawbytes) data = rawbytes.getvalue() else: data = f.readall() if include_paths: return [(path, data)] else: return [data]
def decompress_snappy(data): bio_in = BytesIO(data) bio_out = BytesIO() snappy.stream_decompress(bio_in, bio_out) return bio_out.getvalue()
def decrypt_and_uncompress(data, gpg_private=None, tmp_dir=None, compression='xz'): '''Given a data buffer of bytes, if gpg_key_path is provided, decrypt data using gnupg, and uncompress using `compression` scheme, which defaults to "xz" and can also be "gz", "sz", or "". :returns: a tuple of (logs, data), where `logs` is an array of strings and data is a binary string ''' if not data: logger.error('decrypt_and_uncompress starting with empty data') return ['no data'], None _errors = [] if gpg_private is not None: ### setup gpg for decryption gpg_dir = os.tempnam(tmp_dir, 'tmp-compress-and-encrypt-') os.makedirs(gpg_dir) try: gpg_child = subprocess.Popen([ 'gpg', '--no-permission-warning', '--homedir', gpg_dir, '--import', gpg_private ], stderr=subprocess.PIPE) s_out, errors = gpg_child.communicate() if errors: _errors.append('gpg logs to stderr, read carefully:\n\n%s' % errors) ## decrypt it, and free memory ## encrypt using the fingerprint for our trec-kba-rsa key pair gpg_child = subprocess.Popen( ## setup gpg to decrypt with trec-kba private key ## (i.e. make it the recipient), with zero compression, ## ascii armoring is off by default, and --output - must ## appear before --decrypt - [ 'gpg', '--no-permission-warning', '--homedir', gpg_dir, '--trust-model', 'always', '--output', '-', '--decrypt', '-' ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ## communicate with child via its stdin data, errors = gpg_child.communicate(data) if errors: _errors.append(errors) finally: ## remove the gpg_dir shutil.rmtree(gpg_dir, ignore_errors=True) if not data: logger.error('empty data after gpg decrypt') _errors.append('gpg -> no data') return _errors, None if compression == 'xz': data = xz_decompress(data) elif compression == 'sz': ## sz.decompress is broken per https://github.com/andrix/python-snappy/issues/28 fh = StringIO() sz.stream_decompress(StringIO(data), fh) data = fh.getvalue() elif compression == 'gz': fh = StringIO(data) gz_fh = gz.GzipFile(fileobj=fh, mode='r') data = gz_fh.read(data) elif compression == "" or compression is None: ## data is not compressed pass return _errors, data