コード例 #1
0
ファイル: utils.py プロジェクト: miku/fuzzycat
def zstdlines(filename, encoding="utf-8", bufsize=65536):
    """
    Generator over lines from a zstd compressed file.

    >>> for line in zstdlines("file.zst"):
    ...     print(line)

    """
    with open(filename, "rb") as f:
        decomp = ZstdDecompressor()
        with decomp.stream_reader(f) as reader:
            prev_line = ""
            while True:
                chunk = reader.read(bufsize)
                if not chunk:
                    break
                while True:
                    # We start with bytes but want unicode, which might not
                    # align; so we jitter around the end to complete the
                    # codepoint.
                    try:
                        string_data = chunk.decode(encoding)
                    except UnicodeDecodeError:
                        chunk = chunk + reader.read(1)
                    else:
                        break
                lines = string_data.split("\n")
                for i, line in enumerate(lines[:-1]):
                    if i == 0:
                        line = prev_line + line
                    yield line
                prev_line = lines[-1]
コード例 #2
0
ファイル: main.py プロジェクト: shiodat/PenguinJudge
def run(judge_class: Callable[[], JudgeDriver],
        task: JudgeTask) -> JudgeStatus:
    LOGGER.info('judge start (contest_id: {}, problem_id: {}, '
                'submission_id: {}, user_id: {}'.format(
                    task.contest_id, task.problem_id, task.id, task.user_id))
    zctx = ZstdDecompressor()
    try:
        task.code = zctx.decompress(task.code)
        for test in task.tests:
            test.input = zctx.decompress(test.input)
            test.output = zctx.decompress(test.output)
    except Exception:
        LOGGER.warning('decompress failed', exc_info=True)
        with transaction() as s:
            return _update_submission_status(s, task,
                                             JudgeStatus.InternalError)
    with judge_class() as judge:
        ret = _prepare(judge, task)
        if ret:
            return ret
        if task.compile_image_name:
            ret = _compile(judge, task)
            if ret:
                return ret
        ret = _tests(judge, task)
    LOGGER.info('judge finished (submission_id={}): {}'.format(task.id, ret))
    return ret
コード例 #3
0
    def __enter__(self):
        codec = detect_compression(self.file)

        # (1) Open the input file

        # We keep a reference (`f`) to the original file,
        # in order to be able to close it on exit.
        # Calling `close()` on `ZstdDecompressor` does not
        # close the underlying resource.
        self.f = self.file.open("rb")

        # UTF-8 decoded, decompressed, file
        self.fb = self.f

        # (2) Setup the decompressor, if needed
        if codec == CompressionFormat.Zstandard:
            dict_data = ZstdCompressionDict(dictionary.read_bytes())
            ctx = ZstdDecompressor(dict_data=dict_data)
            self.fb = ctx.stream_reader(self.fb, read_across_frames=True)

        # (3) Decode the file
        self.fb = TextIOWrapper(self.fb, "utf-8")

        # (4) Deserialize the records
        stream = map(json_tryloads, self.fb)

        # (5) Apply the filters
        stream = filter(
            lambda record: all(fn.keep(record) for fn in self.filters), stream)

        # (6) Apply the transformers
        for fn in self.transformers:
            stream = map(fn, stream)

        return stream
コード例 #4
0
def test_log(tmpfile):
    records = [{
        "msm_id": 1234,
        "prb_id": 5678
    }, {
        "msm_id": 9876,
        "prb_id": 5432
    }]

    with AtlasRecordsWriter(tmpfile, compression=True, log=True) as w:
        log_file = w.log_file
        for record in records:
            w.write(record)

    # TODO: Methods to simplify log reading?
    # Zstandard decompression context
    dict_data = ZstdCompressionDict(dictionary.read_bytes())
    ctx = ZstdDecompressor(dict_data=dict_data)

    f = tmpfile.open("rb")
    log_f = log_file.open("rb")

    log = LogEntry.iter_unpack(log_f.read())
    for i, (size, msm_id, prb_id) in enumerate(log):
        rec = json.loads(ctx.decompress(f.read(size)).decode("utf-8"))
        assert rec == records[i]
        assert msm_id == records[i]["msm_id"]
        assert prb_id == records[i]["prb_id"]

    f.close()
    log_f.close()
コード例 #5
0
ファイル: compression.py プロジェクト: Ludaxord/HelloChat
 def __decompress_zst_file(self, file, with_extension):
     with open(file, 'rb') as compressed:
         decomp = ZstdDecompressor()
         filename = os.path.splitext(file)[0]
         file_name = f"{self.destination_folder}/{file.name}{with_extension}"
         with open(file_name, 'wb') as destination:
             decomp.copy_stream(compressed, destination)
     print_green(f"unpacked zst file completed to {file_name}")
コード例 #6
0
def __decompressNcz(nspf, f):
    ncaHeaderSize = 0x4000
    blockID = 0
    nspf.seek(0)
    header = nspf.read(ncaHeaderSize)
    start = f.tell()

    magic = nspf.read(8)
    if not magic == b'NCZSECTN':
        raise ValueError("No NCZSECTN found! Is this really a .ncz file?")
    sectionCount = readInt64(nspf)
    sections = [Section(nspf) for _ in range(sectionCount)]
    nca_size = ncaHeaderSize
    for i in range(sectionCount):
        nca_size += sections[i].size
    pos = nspf.tell()
    blockMagic = nspf.read(8)
    nspf.seek(pos)
    useBlockCompression = blockMagic == b'NCZBLOCK'
    blockSize = -1
    if useBlockCompression:
        BlockHeader = Block(nspf)
        blockDecompressorReader = BlockDecompressorReader.BlockDecompressorReader(
            nspf, BlockHeader)
    pos = nspf.tell()
    if not useBlockCompression:
        decompressor = ZstdDecompressor().stream_reader(nspf)
    hash = sha256()
    f.write(header)
    hash.update(header)

    for s in sections:
        i = s.offset
        crypto = AESCTR(s.cryptoKey, s.cryptoCounter)
        end = s.offset + s.size
        while i < end:
            crypto.seek(i)
            chunkSz = 0x10000 if end - i > 0x10000 else end - i
            if useBlockCompression:
                inputChunk = blockDecompressorReader.read(chunkSz)
            else:
                inputChunk = decompressor.read(chunkSz)
            if not len(inputChunk):
                break
            if not useBlockCompression:
                decompressor.flush()
            if s.cryptoType in (3, 4):
                inputChunk = crypto.encrypt(inputChunk)
            f.write(inputChunk)
            hash.update(inputChunk)
            i += len(inputChunk)

    hexHash = hash.hexdigest()
    end = f.tell()
    written = (end - start)
    return (written, hexHash)
コード例 #7
0
 def decompress(self, fobj: IO[bytes]) -> IO[bytes]:
     decompressor = ZstdDecompressor()
     outfobj = NamedTemporaryFile(delete=False)
     try:
         decompressor.copy_stream(fobj, outfobj)
         outfobj.seek(0)
         yield outfobj
     finally:
         outfobj.close()
         remove(outfobj.name)
コード例 #8
0
 def open_read(self, path: str) -> IO[bytes]:
     decompressor = ZstdDecompressor()
     outfobj = NamedTemporaryFile(delete=False)
     try:
         with open(path, 'rb') as infobj:
             decompressor.copy_stream(infobj, outfobj)
         outfobj.seek(0)
         yield outfobj
     finally:
         outfobj.close()
         remove(outfobj.name)
コード例 #9
0
def _get_test_data(contest_id: str, problem_id: str, test_id: str,
                   is_input: bool) -> Response:
    zctx = ZstdDecompressor()
    from io import BytesIO
    with transaction() as s:
        _ = _validate_token(s, admin_required=True)
        tc = s.query(TestCase).filter(TestCase.contest_id == contest_id,
                                      TestCase.problem_id == problem_id,
                                      TestCase.id == test_id).first()
        if not tc:
            abort(404)
        f = BytesIO(zctx.decompress(tc.input if is_input else tc.output))
        return send_file(f,
                         as_attachment=True,
                         attachment_filename='{}.{}'.format(
                             test_id, 'in' if is_input else 'out'))
コード例 #10
0
class TinyIndexBase:
    def __init__(self, item_type: type, num_pages: int, page_size: int):
        self.item_type = item_type
        self.num_pages = num_pages
        self.page_size = page_size
        self.decompressor = ZstdDecompressor()
        self.mmap = None

    def retrieve(self, key: str):
        index = self._get_key_page_index(key)
        page = self.get_page(index)
        if page is None:
            return []
        print("REtrieve", self.index_path, page)
        return self.convert_items(page)

    def _get_key_page_index(self, key):
        key_hash = mmh3.hash(key, signed=False)
        return key_hash % self.num_pages

    def get_page(self, i):
        """
        Get the page at index i, decompress and deserialise it using JSON
        """
        page_data = self.mmap[i * self.page_size:(i + 1) * self.page_size]
        try:
            decompressed_data = self.decompressor.decompress(page_data)
        except ZstdError:
            return None
        return json.loads(decompressed_data.decode('utf8'))

    def convert_items(self, items):
        converted = [self.item_type(*item) for item in items]
        # print("Converted", items, converted)
        return converted
コード例 #11
0
 def from_file(cls, file):
     data = []
     # TODO: Unified "file loader"
     # (detect_codec is used is many place)
     codec = detect_compression(file)
     with open(file, "rb") as f:
         if codec == CompressionFormat.Zstandard:
             ctx = ZstdDecompressor()
             f = ctx.stream_reader(f)
         f = TextIOWrapper(f, "utf-8")
         for line in f:
             if line.startswith(";"):
                 continue
             prefix, origins = line.split("\t")
             origins = [int(x) for x in origins.split(",")]
             data.append((prefix, origins))
     return cls(data)
コード例 #12
0
 def __init__(self, item_type: type, index_path: str, num_pages: int,
              page_size: int):
     super().__init__(item_type, num_pages, page_size)
     self.index_path = index_path
     self.compressor = ZstdCompressor()
     self.decompressor = ZstdDecompressor()
     self.index_file = None
     self.mmap = None
コード例 #13
0
ファイル: util.py プロジェクト: 9001/softchat
def zopen(fn, mode="r", *args, **kwargs):
    import codecs

    objs = None
    if fn.endswith(".gz"):
        import gzip

        objs = [gzip.open(fn, "rb")]

    elif fn.endswith(".bz2"):
        import bz2

        objs = [bz2.open(fn, "rb")]

    elif fn.endswith(".xz"):
        import lzma

        objs = [lzma.open(fn, "rb")]

    elif fn.endswith(".zst"):
        from zstandard import ZstdDecompressor

        try:
            # documentation says KiB but it seems to be bytes
            ctx = ZstdDecompressor(max_window_size=1024 * 1024 * 1024 * 2)
        except:
            # fallback in case that changes
            ctx = ZstdDecompressor(max_window_size=1024 * 1024 * 2)

        f1 = open(fn, "rb", 512 * 1024)
        f2 = ctx.stream_reader(f1)
        objs = [f2, f1]

    else:
        objs = [open(fn, "rb", 512 * 1024)]

    if "b" not in mode:
        enc = kwargs.get("encoding", "utf-8")
        # yield io.TextIOWrapper(io.BufferedReader(f2))
        yield codecs.getreader(enc)(objs[0])
    else:
        yield objs[0]

    for obj in objs:
        obj.close()
コード例 #14
0
def open_compressed(path):
    if is_gzip(path):
        return gzip.open(path)

    if is_zstd(path):
        from zstandard import ZstdDecompressor
        return ZstdDecompressor().stream_reader(open(path, 'rb'))

    return open(path, 'rb')
コード例 #15
0
class ZstdJsonSerializer(Serializer):
    def __init__(self):
        self.compressor = ZstdCompressor()
        self.decompressor = ZstdDecompressor()

    def serialize(self, item) -> bytes:
        return self.compressor.compress(json.dumps(item).encode('utf8'))

    def deserialize(self, serialized_item: bytes):
        return json.loads(
            self.decompressor.decompress(serialized_item).decode('utf8'))
コード例 #16
0
 def __decompressBlock(self, blockID):
     if (blockID >= len(self.CompressedBlockOffsetList)):
         raise EOFError(
             "BlockID exceeds the amounts of compressed blocks in that file!"
         )
     self.nspf.seek(self.CompressedBlockOffsetList[blockID])
     if self.CompressedBlockSizeList[blockID] < self.BlockSize:
         return ZstdDecompressor().decompress(self.nspf.read(
             self.BlockSize))
     else:
         return self.nspf.read(self.BlockSize)
コード例 #17
0
def get_submission(contest_id: str, submission_id: str) -> Response:
    params, _ = _validate_request()
    zctx = ZstdDecompressor()
    with transaction() as s:
        u = _validate_token(s)
        contest = s.query(Contest).filter(Contest.id == contest_id).first()
        if not (contest and contest.is_accessible(u)):
            abort(404)
        tmp = s.query(Submission,
                      User.name).filter(Submission.contest_id == contest_id,
                                        Submission.id == submission_id,
                                        Submission.user_id == User.id).first()
        if not tmp:
            abort(404)
        submission, user_name = tmp
        if not submission.is_accessible(contest, u):
            abort(404)
        ret = submission.to_dict()
        ret['user_name'] = user_name
        ret['tests'] = []
        for t_raw in s.query(JudgeResult).filter(
                JudgeResult.submission_id == submission_id).order_by(
                    JudgeResult.status, JudgeResult.test_id):
            t = t_raw.to_dict()

            # 不要な情報を削除
            t.pop('contest_id')
            t.pop('problem_id')
            t.pop('submission_id')
            t['id'] = t['test_id']
            t.pop('test_id')
            if not (contest.is_finished() or (u and u['admin'])):
                # コンテスト中&非管理者の場合は
                # 実行時間とメモリ消費量を返却しない
                # (NULLの場合はto_dictで設定されないのでpopの引数にNoneを指定)
                t.pop('time', None)
                t.pop('memory', None)
            ret['tests'].append(t)

    ret['code'] = zctx.decompress(ret['code']).decode('utf-8')
    return jsonify(ret)
コード例 #18
0
ファイル: _read_rel.py プロジェクト: yeahrmek/pandas-plink
def _read_rel_zs_rows(filepath, chunk_size=8 * 1000 * 1000):
    from zstandard import ZstdDecompressor

    with open(filepath, "rb") as fh:
        ctx = ZstdDecompressor()
        with ctx.stream_reader(fh) as reader:
            over = False
            chunks = []
            rows = []
            while not over:
                have_row = False
                while not have_row:
                    chunk = reader.read(chunk_size)
                    if not chunk:
                        over = True
                        break
                    if b"\n" in chunk:
                        have_row = True
                    chunks.append(chunk)
                (new_rows, semi_row) = _consume_rows(chunks)
                rows += new_rows
                chunks = [semi_row]
    return rows
コード例 #19
0
def decompress(data, compressor_id):
    if compressor_id == SnappyContext.compressor_id:
        # python-snappy doesn't support the buffer interface.
        # https://github.com/andrix/python-snappy/issues/65
        # This only matters when data is a memoryview since
        # id(bytes(data)) == id(data) when data is a bytes.
        return snappy.uncompress(bytes(data))
    elif compressor_id == ZlibContext.compressor_id:
        return zlib.decompress(data)
    elif compressor_id == ZstdContext.compressor_id:
        # ZstdDecompressor is not thread safe.
        # TODO: Use a pool?
        return ZstdDecompressor().decompress(data)
    else:
        raise ValueError("Unknown compressorId %d" % (compressor_id, ))
コード例 #20
0
 def __decompressBlock(self, blockID):
     if self.CurrentBlockId == blockID:
         return self.CurrentBlock
     decompressedBlockSize = self.BlockSize
     if blockID >= len(self.CompressedBlockOffsetList) - 1:
         if blockID >= len(self.CompressedBlockOffsetList):
             raise EOFError(
                 "BlockID exceeds the amounts of compressed blocks in that file!"
             )
         decompressedBlockSize = self.BlockHeader.decompressedSize % BlockSize
     self.nspf.seek(self.CompressedBlockOffsetList[blockID])
     if self.CompressedBlockSizeList[blockID] < decompressedBlockSize:
         self.CurrentBlock = ZstdDecompressor().decompress(
             self.nspf.read(decompressedBlockSize))
     else:
         self.CurrentBlock = self.nspf.read(decompressedBlockSize)
     self.CurrentBlockId = blockID
     return self.CurrentBlock
コード例 #21
0
def decompress(data, compressor_id):
    if compressor_id == SnappyContext.compressor_id:
        # python-snappy doesn't support the buffer interface.
        # https://github.com/andrix/python-snappy/issues/65
        # This only matters when data is a memoryview since
        # id(bytes(data)) == id(data) when data is a bytes.
        # NOTE: bytes(memoryview) returns the memoryview repr
        # in Python 2.7. The right thing to do in 2.7 is call
        # memoryview.tobytes(), but we currently only use
        # memoryview in Python 3.x.
        return snappy.uncompress(bytes(data))
    elif compressor_id == ZlibContext.compressor_id:
        return zlib.decompress(data)
    elif compressor_id == ZstdContext.compressor_id:
        # ZstdDecompressor is not thread safe.
        # TODO: Use a pool?
        return ZstdDecompressor().decompress(data)
    else:
        raise ValueError("Unknown compressorId %d" % (compressor_id, ))
コード例 #22
0
    def __init__(
        self,
        path: Path,
        *,
        encoding: str,
        warn_uncompressed: bool = True,
        progress_bar: bool = False,
        progress_bar_desc: Optional[str] = None,
    ):
        self.path = path

        self._fp = path.open("rb")
        self._fin: BinaryIO
        if path.suffix == ".gz":
            self._fin = cast(BinaryIO, GzipFile(fileobj=self._fp))
        elif path.suffix == ".bz2":
            self._fin = cast(BinaryIO, BZ2File(self._fp))
        elif path.suffix == ".xz":
            self._fin = cast(BinaryIO, LZMAFile(self._fp))
        elif path.suffix == ".zst":
            self._fin = cast(BinaryIO,
                             ZstdDecompressor().stream_reader(self._fp))
        else:
            if warn_uncompressed:  # pragma: no cover
                _LOGGER.warning(
                    "Could not detect compression type of file '{}' from its "
                    "extension, treating as uncompressed file.",
                    path,
                )
            self._fin = self._fp

        self._progress_bar: Optional[tqdm[None]] = None
        if progress_bar:
            self._progress_bar = tqdm(
                desc=progress_bar_desc or self.path.name,
                total=self.size(),
                unit="B",
                unit_scale=True,
                unit_divisor=1024,
                dynamic_ncols=True,
            )

        super().__init__(self._fin, encoding=encoding)
コード例 #23
0
ファイル: NszDecompressor.py プロジェクト: SIN-NIN/nsz
def __decompressNcz(nspf, f, statusReportInfo, pleaseNoPrint):
    ncaHeaderSize = 0x4000
    blockID = 0
    nspf.seek(0)
    header = nspf.read(ncaHeaderSize)
    if f != None:
        start = f.tell()

    magic = nspf.read(8)
    if not magic == b'NCZSECTN':
        raise ValueError("No NCZSECTN found! Is this really a .ncz file?")
    sectionCount = nspf.readInt64()
    sections = [Header.Section(nspf) for _ in range(sectionCount)]
    nca_size = ncaHeaderSize
    for i in range(sectionCount):
        nca_size += sections[i].size
    pos = nspf.tell()
    blockMagic = nspf.read(8)
    nspf.seek(pos)
    useBlockCompression = blockMagic == b'NCZBLOCK'
    blockSize = -1
    if useBlockCompression:
        BlockHeader = Header.Block(nspf)
        blockDecompressorReader = BlockDecompressorReader.BlockDecompressorReader(
            nspf, BlockHeader)
    pos = nspf.tell()
    if not useBlockCompression:
        decompressor = ZstdDecompressor().stream_reader(nspf)
    hash = sha256()

    if statusReportInfo == None:
        BAR_FMT = u'{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:{len_total}d}/{total:d} {unit} [{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]'
        bar = enlighten.Counter(total=nca_size // 1048576,
                                desc='Decompress',
                                unit="MiB",
                                color='red',
                                bar_format=BAR_FMT)
    decompressedBytes = len(header)
    if f != None:
        f.write(header)
    if statusReportInfo != None:
        statusReport, id = statusReportInfo
        statusReport[id] = [len(header), 0, nca_size]
    else:
        bar.count = decompressedBytes // 1048576
        bar.refresh()
    hash.update(header)

    for s in sections:
        i = s.offset
        crypto = aes128.AESCTR(s.cryptoKey, s.cryptoCounter)
        end = s.offset + s.size
        while i < end:
            crypto.seek(i)
            chunkSz = 0x10000 if end - i > 0x10000 else end - i
            if useBlockCompression:
                inputChunk = blockDecompressorReader.read(chunkSz)
            else:
                inputChunk = decompressor.read(chunkSz)
            if not len(inputChunk):
                break
            if not useBlockCompression:
                decompressor.flush()
            if s.cryptoType in (3, 4):
                inputChunk = crypto.encrypt(inputChunk)
            if f != None:
                f.write(inputChunk)
            hash.update(inputChunk)
            lenInputChunk = len(inputChunk)
            i += lenInputChunk
            decompressedBytes += lenInputChunk
            if statusReportInfo != None:
                statusReport[id] = [
                    statusReport[id][0] + chunkSz, statusReport[id][1],
                    nca_size
                ]
            else:
                bar.count = decompressedBytes // 1048576
                bar.refresh()

    bar.close()
    hexHash = hash.hexdigest()
    if f != None:
        end = f.tell()
        written = (end - start)
        return (written, hexHash)
    return (0, hexHash)
コード例 #24
0
 def __init__(self):
     self.compressor = ZstdCompressor()
     self.decompressor = ZstdDecompressor()
コード例 #25
0
ファイル: NszDecompressor.py プロジェクト: silva100/nsz
def __decompressNcz(nspf, f, statusReportInfo, pleaseNoPrint):
    UNCOMPRESSABLE_HEADER_SIZE = 0x4000
    blockID = 0
    nspf.seek(0)
    header = nspf.read(UNCOMPRESSABLE_HEADER_SIZE)
    if f != None:
        start = f.tell()

    magic = nspf.read(8)
    if not magic == b'NCZSECTN':
        raise ValueError("No NCZSECTN found! Is this really a .ncz file?")
    sectionCount = nspf.readInt64()
    sections = [Header.Section(nspf) for _ in range(sectionCount)]
    if sections[0].offset - UNCOMPRESSABLE_HEADER_SIZE > 0:
        fakeSection = Header.FakeSection(
            UNCOMPRESSABLE_HEADER_SIZE,
            sections[0].offset - UNCOMPRESSABLE_HEADER_SIZE)
        sections.insert(0, fakeSection)
    nca_size = UNCOMPRESSABLE_HEADER_SIZE
    for i in range(sectionCount):
        nca_size += sections[i].size
    pos = nspf.tell()
    blockMagic = nspf.read(8)
    nspf.seek(pos)
    useBlockCompression = blockMagic == b'NCZBLOCK'
    blockSize = -1
    if useBlockCompression:
        BlockHeader = Header.Block(nspf)
        blockDecompressorReader = BlockDecompressorReader.BlockDecompressorReader(
            nspf, BlockHeader)
    pos = nspf.tell()
    if not useBlockCompression:
        decompressor = ZstdDecompressor().stream_reader(nspf)
    hash = sha256()

    if statusReportInfo == None:
        BAR_FMT = u'{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:{len_total}d}/{total:d} {unit} [{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]'
        bar = enlighten.Counter(total=nca_size // 1048576,
                                desc='Decompress',
                                unit="MiB",
                                color='red',
                                bar_format=BAR_FMT)
    decompressedBytes = len(header)
    if f != None:
        f.write(header)
    if statusReportInfo != None:
        statusReport, id = statusReportInfo
        statusReport[id] = [len(header), 0, nca_size]
    else:
        bar.count = decompressedBytes // 1048576
        bar.refresh()
    hash.update(header)

    firstSection = True
    for s in sections:
        i = s.offset
        useCrypto = s.cryptoType in (3, 4)
        if useCrypto:
            crypto = aes128.AESCTR(s.cryptoKey, s.cryptoCounter)
        end = s.offset + s.size
        if firstSection:
            firstSection = False
            uncompressedSize = UNCOMPRESSABLE_HEADER_SIZE - sections[0].offset
            if uncompressedSize > 0:
                i += uncompressedSize
        while i < end:
            if useCrypto:
                crypto.seek(i)
            chunkSz = 0x10000 if end - i > 0x10000 else end - i
            if useBlockCompression:
                inputChunk = blockDecompressorReader.read(chunkSz)
            else:
                inputChunk = decompressor.read(chunkSz)
                decompressor.flush()
            if not len(inputChunk):
                break
            if useCrypto:
                inputChunk = crypto.encrypt(inputChunk)
            if f != None:
                f.write(inputChunk)
            hash.update(inputChunk)
            lenInputChunk = len(inputChunk)
            i += lenInputChunk
            decompressedBytes += lenInputChunk
            if statusReportInfo != None:
                statusReport[id] = [
                    statusReport[id][0] + chunkSz, statusReport[id][1],
                    nca_size
                ]
            else:
                bar.count = decompressedBytes // 1048576
                bar.refresh()

    if statusReportInfo == None:
        bar.close()
        #Line break after closing the process bar is required to prevent
        #the next output from being on the same line as the process bar
        print()
    hexHash = hash.hexdigest()
    if f != None:
        end = f.tell()
        written = (end - start)
        return (written, hexHash)
    return (0, hexHash)
コード例 #26
0
 def __init__(self, item_type: type, num_pages: int, page_size: int):
     self.item_type = item_type
     self.num_pages = num_pages
     self.page_size = page_size
     self.decompressor = ZstdDecompressor()
     self.mmap = None
コード例 #27
0
ファイル: utils.py プロジェクト: xtivat0r/TinGen
def read_index(index_path: Path, rsa_priv_key_path: Path = None) -> dict:
    if index_path is None or not index_path.is_file():
        raise RuntimeError(
            f"Unable to read non-existant index file \"{index_path}\"")

    encryption_flag = None
    compression_flag = None
    session_key = None
    data_size = None
    to_read_buffer = None

    with open(index_path, "rb") as index_stream:
        magic = str(index_stream.read(7))

        if magic != "TINFOIL":
            raise RuntimeError(
                "Invalid tinfoil index magic.\n\nExpected Magic = " +
                f"\"TINFOIL\"\nMagic in index file = \"{magic}\"")

        flags = index_stream.read(1)[0]
        encryption_flag = flags & 0xF0

        key_available = rsa_priv_key_path is not None and \
            rsa_priv_key_path.is_file()

        if encryption_flag == EncryptionFlag.ENCRYPT and not key_available:
            raise RuntimeError(
                "Unable to decrypt encrypted index without private key.")

        compression_flag = flags & 0x0F

        if compression_flag not in CompressionFlag:
            raise RuntimeError(
                "Unimplemented compression method encountered while reading " +
                "index header.")

        session_key = index_stream.read(0x100)
        data_size = int.from_bytes(index_stream.read(8), byteorder="little")
        to_read_buffer = index_stream.read()

    if encryption_flag == EncryptionFlag.ENCRYPT:
        rsa_priv_key = import_rsa_key(open(rsa_priv_key_path).read())
        pkcs1_oaep_ctx = new_pkcs1_oaep_ctx(rsa_priv_key,
                                            hashAlgo=SHA256,
                                            label=b"")
        aes_key = pkcs1_oaep_ctx.decrypt(session_key)
        aes_ctx = new_aes_ctx(aes_key, MODE_ECB)
        to_read_buffer = aes_ctx.decrypt(to_read_buffer)

    if compression_flag == CompressionFlag.ZSTD_COMPRESSION:
        to_read_buffer = ZstdDecompressor().decompress(
            to_read_buffer[:data_size])

    elif compression_flag == CompressionFlag.ZLIB_COMPRESSION:
        to_read_buffer = zlib_decompress(to_read_buffer[:data_size])

    elif compression_flag == CompressionFlag.NO_COMPRESSION:
        to_read_buffer = to_read_buffer[:data_size]

    try:
        return json_deserialize(to_read_buffer)

    except JSONDecodeError:
        raise RuntimeError("Unable to deserialize index data.")
コード例 #28
0
 blockMagic = f.read(8)
 f.seek(pos)
 useBlockCompression = blockMagic == b'NCZBLOCK'
 if useBlockCompression:
     BlockHeader = Block(f)
     if BlockHeader.blockSizeExponent < 14 or BlockHeader.blockSizeExponent > 32:
         raise ValueError(
             "Corrupted NCZBLOCK header: Block size must be between 14 and 32"
         )
     blockSize = 2**BlockHeader.blockSizeExponent
 pos = f.tell()
 with open(argv[2], 'wb+') as o:
     o.write(header)
     decompressedBytes = 0
     blockID = 0
     dctx = ZstdDecompressor()
     if not useBlockCompression:
         decompressor = dctx.stream_reader(f)
     while True:
         if useBlockCompression:
             if BlockHeader.compressedBlockSizeList[blockID] < blockSize:
                 decompressor = dctx.stream_reader(f)
                 inputChunk = decompressor.read(blockSize)
                 decompressedBytes += len(inputChunk)
                 o.write(inputChunk)
                 decompressor.flush()
                 o.flush()
                 print(
                     'Block',
                     str(blockID + 1) + '/' +
                     str(BlockHeader.numberOfBlocks))
コード例 #29
0
ファイル: NszDecompressor.py プロジェクト: yukun451/nut
def __decompressNcz(nspf, f, statusReportInfo):
    UNCOMPRESSABLE_HEADER_SIZE = 0x4000
    blockID = 0
    nspf.seek(0)
    header = nspf.read(UNCOMPRESSABLE_HEADER_SIZE)
    if f is not None:
        start = f.tell()

    magic = nspf.read(8)
    if not magic == b'NCZSECTN':
        raise ValueError("No NCZSECTN found! Is this really a .ncz file?")
    sectionCount = nspf.readInt64()
    sections = [Section(nspf) for _ in range(sectionCount)]
    if sections[0].offset - UNCOMPRESSABLE_HEADER_SIZE > 0:
        fakeSection = FakeSection(
            UNCOMPRESSABLE_HEADER_SIZE,
            sections[0].offset - UNCOMPRESSABLE_HEADER_SIZE)
        sections.insert(0, fakeSection)
    nca_size = UNCOMPRESSABLE_HEADER_SIZE
    for i in range(sectionCount):
        nca_size += sections[i].size

    decompressor = ZstdDecompressor().stream_reader(nspf)
    hash = sha256()

    bar = Status.create(nspf.size, desc=os.path.basename(nspf._path), unit='B')

    # if statusReportInfo == None:
    #	BAR_FMT = u'{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:{len_total}d}/{total:d} {unit} [{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]'
    #	bar = enlighten.Counter(total=nca_size//1048576, desc='Decompress', unit="MiB", color='red', bar_format=BAR_FMT)
    decompressedBytes = len(header)
    if f is not None:
        f.write(header)
        bar.add(len(header))

    hash.update(header)

    firstSection = True
    for s in sections:
        i = s.offset
        useCrypto = s.cryptoType in (3, 4)
        if useCrypto:
            crypto = aes128.AESCTR(s.cryptoKey, s.cryptoCounter)
        end = s.offset + s.size
        if firstSection:
            firstSection = False
            uncompressedSize = UNCOMPRESSABLE_HEADER_SIZE - sections[0].offset
            if uncompressedSize > 0:
                i += uncompressedSize
        while i < end:
            if useCrypto:
                crypto.seek(i)
            chunkSz = 0x10000 if end - i > 0x10000 else end - i

            inputChunk = decompressor.read(chunkSz)
            decompressor.flush()

            if not len(inputChunk):
                break
            if useCrypto:
                inputChunk = crypto.encrypt(inputChunk)
            if f is not None:
                f.write(inputChunk)
                bar.add(len(inputChunk))
            hash.update(inputChunk)
            lenInputChunk = len(inputChunk)
            i += lenInputChunk
            decompressedBytes += lenInputChunk
            bar.add(lenInputChunk)

    bar.close()
    print()

    hexHash = hash.hexdigest()
    if f is not None:
        end = f.tell()
        written = (end - start)
        return (written, hexHash)
    return (0, hexHash)
コード例 #30
0
        else:
            if not sent:
                continue
            if para_id == '0':
                continue

            out += ' '.join(tokenizer.tokenize(sent)) + '\n'
            sent = ''

    if sent:
        out += ' '.join(tokenizer.tokenize(sent)) + '\n'

    return out


dctx = ZstdDecompressor()

for group in tqdm(os.listdir(source_dir), ncols=80, position=0):
    group_dir = os.path.join(source_dir, group)
    group_outs = []

    for filename in tqdm(os.listdir(group_dir), ncols=80, desc=group, position=1):
        filepath = os.path.join(group_dir, filename)

        with open(filepath, 'rb') as f:
            with dctx.stream_reader(f) as r:
                text_stream = io.TextIOWrapper(r, encoding='utf-8')
                out = convert_file(text_stream)
        group_outs.append(out)

    out = '\n'.join(group_outs)