Example #1
0
 def test(self):
     if False:
         # to help with debugging:
         # print the first 16 sync points - some _may_ be in the audio data
         bfr = CornuCopyBuffer.from_filename(TESTFILE)
         count = 16
         while not bfr.at_eof() and count > 0:
             bs = b''.join(MP3AudioFrame.scan_for_sync(bfr))
             X("AUDIO at %d after %d bytes", bfr.offset, len(bs))
             bfr.take(1)
             count -= 1
     S = os.stat(TESTFILE)
     mp3_size = S.st_size
     bfr = CornuCopyBuffer.from_filename(TESTFILE)
     for offset, frame, post_offset in MP3Frame.scan_with_offsets(bfr):
         frame_size = post_offset - offset
         frame_bs = bytes(frame)
         ##frame2 = MP3Frame.from_bytes(frame_bs)
         ##self.assertIs(type(frame), type(frame2))
         # There used to be a round trip size check, but we repair
         # some input data and write it out correctly, so the size can
         # change. Example: a USC-2 text field missing its BOM.
     self.assertEqual(
         bfr.offset, mp3_size,
         "file size = %d, buffer offset = %d" % (mp3_size, bfr.offset))
     self.assertTrue(bfr.at_eof())
     bfr.close()
Example #2
0
 def __iter__(self):
     _, payload = self.S.do(ArchiveListRequest(self.archive_name))
     bfr = CornuCopyBuffer([payload])
     while not bfr.at_eof():
         when = BSString.parse_value(bfr)
         when = float(when)
         E = BSString.parse_value(bfr)
         E = parse(E)
         if not isinstance(E, _Dirent):
             raise ValueError("not a _Dirent: %r" % (E, ))
         yield when, E
Example #3
0
    def pushto_queue(self, Q, offset=0, runstate=None, progress=None):
        ''' Push the `Block`s from this `DataFile` to the Queue `Q`.

        Note that if the target store is a DataDirStore
        it is faster and simpler to move/copy the `.vtd` file
        into its `data` subdirectory directly.
        Of course, that may introduce redundant block copies.

        Parameters:
        * `Q`: queue on which to put blocks
        * `offset`: starting offset, default `0`.
        * `runstate`: optional `RunState` used to cancel operation.
    '''
        if progress:
            progress.total += len(self) - offset
        with open(self.pathname, 'rb') as f:
            f.seek(offset)
            bfr = CornuCopyBuffer(datafrom(f, offset), offset=offset)
            for DR in DataRecord.parse_buffer(bfr):
                if runstate and runstate.cancelled:
                    return False
                data = DR.data
                Q.put(Block(data=data))
                if progress:
                    progress += len(data)
        return True
Example #4
0
  def upload_bytes(
      self,
      bs,
      *,
      bucket_name: str,
      path: str,
      file_info=None,
      content_type=None,
      upload_progress=None,
  ):
    ''' Upload bytes from `bs` to `path` within `bucket_name`.

        The default implementation calls `self.upload_buffer()`.

        Parameters:
        * `bs`: the source `bytes`-like object
        * `bucket_name`: the bucket name
        * `path`: the subpath within the bucket
        * `file_info`: an optional mapping of extra information about the file
        * `content_type`: an optional MIME content type value
        * `upload_progress`: an optional `cs.progress.Progress` instance
          to which to report upload data
    '''
    return self.upload_buffer(
        CornuCopyBuffer([bs]),
        bucket_name=bucket_name,
        path=path,
        file_info=file_info,
        content_type=content_type,
        upload_progress=upload_progress
    )
Example #5
0
 def test_shuffled_randomblocks(self):
     ''' Save RUN_SIZE random blocks, close, retrieve in random order.
 '''
     # save random blocks to a file
     blocks = {}
     with open(self.pathname, 'wb') as f:
         for n in range(RUN_SIZE):
             with self.subTest(put_block_n=n):
                 data = make_randblock(rand0(MAX_BLOCK_SIZE + 1))
                 dr = DataRecord(data)
                 offset = f.tell()
                 blocks[offset] = data
                 f.write(bytes(dr))
     # shuffle the block offsets
     offsets = list(blocks.keys())
     random.shuffle(offsets)
     # retrieve the blocks in random order, check for correct content
     with open(self.pathname, 'rb') as f:
         for n, offset in enumerate(offsets):
             with self.subTest(shuffled_offsets_n=n, offset=offset):
                 f.seek(offset)
                 bfr = CornuCopyBuffer.from_file(f)
                 dr = DataRecord.parse(bfr)
                 data = dr.data
                 self.assertTrue(data == blocks[offset])
Example #6
0
  def upload_file(
      self,
      f,
      *,
      bucket_name: str,
      path: str,
      file_info=None,
      content_type=None,
      upload_progress=None,
  ):
    ''' Upload the data from the file `f` to `path` within `bucket_name`.
        Return a `dict` containing the upload result.

        The default implementation calls `self.upload_buffer()`.

        Parameters:
        * `f`: the file
        * `bucket_name`: the bucket name
        * `path`: the subpath within the bucket
        * `file_info`: an optional mapping of extra information about the file
        * `content_type`: an optional MIME content type value
        * `upload_progress`: an optional `cs.progress.Progress` instance
          to which to report upload data
    '''
    return self.upload_buffer(
        CornuCopyBuffer.from_file(f),
        bucket_name=bucket_name,
        path=path,
        file_info=file_info,
        content_type=content_type,
        upload_progress=upload_progress,
    )
Example #7
0
 def hashcodes(self,
               start_hashcode=None,
               after: bool = False,
               length: Optional[int] = None):
     hashclass = self.hashclass
     if length is not None and length < 1:
         raise ValueError("length should be None or >1, got: %r" %
                          (length, ))
     if after and start_hashcode is None:
         raise ValueError("after=%s but start_hashcode=%s" %
                          (after, start_hashcode))
     if length is not None and length < 1:
         raise ValueError("length should be None or >1, got: %r" %
                          (length, ))
     if after and start_hashcode is None:
         raise ValueError("after=%s but start_hashcode=%s" %
                          (after, start_hashcode))
     flags, payload = self.do(
         HashCodesRequest(start_hashcode=start_hashcode,
                          hashclass=hashclass,
                          after=after,
                          length=length))
     if flags:
         raise StoreError("unexpected flags: 0x%02x" % (flags, ))
     bfr = CornuCopyBuffer([payload])
     hashary = list(HashCodeField.scan_values(bfr))
     # verify hashcode types
     mismatches = set(
         type(hashcode).__name__ for hashcode in hashary
         if not isinstance(hashcode, hashclass))
     if mismatches:
         raise StoreError(
             "expected hashcodes of type %s, got %d mismatches of of type %s"
             % (hashclass.__name__, len(mismatches), sorted(mismatches)))
     return hashary
Example #8
0
    def download_buffer(
            self,
            *,
            bucket_name: str,
            path: str,
            download_progress=None,  # pylint: disable=unused-argument
    ) -> (CornuCopyBuffer, dict):
        ''' Download from `path` within `bucket_name`,
        returning `(buffer,file_info)`
        being a `CornuCopyBuffer` presenting the data bytes
        and the file info uploaded with the file.

        Parameters:
        * `bucket_name`: the bucket name
        * `path`: the subpath within the bucket
        * `download_progress`: an optional `cs.progress.Progress` instance
          to which to report download data
    '''
        filename = os.sep + joinpath(bucket_name, path)
        with Pfx("open(%r)", filename):
            with open(filename, 'rb') as f:
                bfr = CornuCopyBuffer.from_fd(f.fileno(),
                                              progress=download_progress)
        with FSTags() as fstags:
            file_info = fstags[filename].as_dict()
        return bfr, file_info
Example #9
0
 def from_pathname(cls, pathname, readsize=None, **kw):
   ''' Compute hashcode from the contents of the file `pathname`.
   '''
   if readsize is None:
     readsize = DEFAULT_READSIZE
   return cls.from_buffer(
       CornuCopyBuffer.from_filename(pathname, readsize=readsize, **kw)
   )
Example #10
0
    def prev_dirent(self):
        ''' Return the previous Dirent.

        If not None, during encoding or transcription, if self !=
        prev_dirent, include it in the encoding or transcription.

        TODO: parse out multiple blockrefs.
    '''
        prev_blockref = self._prev_dirent_blockref
        if prev_blockref is None:
            return None
        bfr = CornuCopyBuffer(prev_blockref)
        E = _Dirent.from_buffer(bfr)
        if not bfr.at_eof():
            warning(
                "prev_dirent: _prev_dirent_blockref=%s:"
                " unparsed bytes after dirent at offset %d", prev_blockref,
                bfr.offset)
        return E
Example #11
0
 def parse(cls, bfr):
     self = cls()
     # pylint: disable=attribute-defined-outside-init
     self.tag_id = bfr.take(4)
     with Pfx(self.tag_id):
         size = UInt32BE.parse_value(bfr)
         self.flags = UInt16BE.parse_value(bfr)
         if size < 1:
             warning("size < 1")
         else:
             data_bs = bfr.take(size)
         data_type = self.tag_id_class(self.tag_id)
         if data_type is None:
             self.dataframe_body = data_bs
         else:
             databfr = CornuCopyBuffer([data_bs])
             self.datafrome_body = data_type.parse(databfr)
             if not databfr.at_eof():
                 warning("unparsed data: %r" % (databfr.take(...), ))
     return self
Example #12
0
 def _test_chunks(data_spec):
     ''' Return an iterable of chunks from a data spec (filename or list-of-bytes).
 '''
     # obtain the test data
     if data_spec is None:
         chunks = None
     elif isinstance(data_spec, str):
         chunks = CornuCopyBuffer.from_filename(data_spec)
     elif isinstance(data_spec, (list, tuple)):
         chunks = data_spec
     else:
         raise RuntimeError("unexpected data_spec of type %s" %
                            (type(data_spec), ))
     return chunks
Example #13
0
    def upload_file(
        self,
        f,
        *,
        bucket_name: str,
        path: str,
        file_info=None,
        content_type=None,
        upload_progress=None,
    ):
        ''' Upload the data from the file `f` to `path` within `bucket_name`.
        Return a `dict` containing the B2 `FileVersion` attribute values.

        Note that the b2api expects to be able to seek when given a file so
        this tries to `mmap.mmap` the file and use the bytes upload
        interface, falling back to coping to a scratch file.

        Parameters:
        * `f`: the file, preferably seekable
        * `bucket_name`: the bucket name
        * `path`: the subpath within the bucket
        * `file_info`: an optional mapping of extra information about the file
        * `content_type`: an optional MIME content type value
        * `upload_progress`: an optional `cs.progress.Progress` instance
          to which to report upload data
    '''
        try:
            fd = f.fileno()
            mm = mmap(fd, 0, prot=PROT_READ)
        except (AttributeError, OSError) as e:  # no .fileno, not mmapable
            warning("f=%s: %s", f, e)
            # upload via a scratch file
            bfr = f if isinstance(
                f, CornuCopyBuffer) else CornuCopyBuffer.from_file(f)
            return self.upload_buffer(
                bfr,
                bucket_name=bucket_name,
                path=path,
                file_info=file_info,
                content_type=content_type,
                upload_progress=upload_progress,
            )
        else:
            file_version = self._b2_upload_bytes(
                mm,
                bucket_name=bucket_name,
                path=path,
                upload_progress=upload_progress,
            )
            return file_version.as_dict()
Example #14
0
def xattrs_from_bytes(bs, offset=0):
    ''' Decode an XAttrs from some bytes, return the xattrs dictionary.
  '''
    bfr = CornuCopyBuffer.from_bytes(bs)
    if offset > 0:
        bfr.skip(offset)
    xattrs = {}
    while not bfr.at_eof():
        name = BSString.parse_value(bfr)
        data = BSData.parse_value(bfr)
        if name in xattrs:
            warning("repeated name, ignored: %r", name)
        else:
            xattrs[name] = data
    return xattrs
Example #15
0
 def parse(cls, bfr):
     self = cls()
     # pylint: disable=attribute-defined-outside-init
     self.tag_id = bfr.take(3)
     with Pfx(self.tag_id):
         sz0, sz1, sz2 = bfr.take(3)
         size = sz0 << 16 | sz1 << 8 | sz2
         if size < 1:
             warning("size < 1")
         else:
             data_bs = bfr.take(size)
         if not data_bs or data_bs[0] == 0:
             # forbidden empty data or data zeroed out
             data_type = None
         else:
             data_type = self.tag_id_class(self.tag_id)
         if data_type is None:
             self.value = data_bs
         else:
             databfr = CornuCopyBuffer([data_bs])
             self.value = data_type.parse(databfr)
             if not databfr.at_eof():
                 warning("unparsed data: %r" % (databfr.take(...), ))
     return self
Example #16
0
 def last(self):
     ''' The last Archive entry `(when,E)` or `(None,None)`.
 '''
     with Pfx("%s.last", self):
         try:
             flags, payload = self.S.do(
                 ArchiveLastRequest(self.archive_name))
         except StoreError as e:
             warning("%s, returning (None, None)", e)
             return ArchiveEntry(None, None)
         found = flags & 0x01
         if not found:
             return ArchiveEntry(None, None)
         bfr = CornuCopyBuffer.from_bytes(payload)
         entry = ArchiveEntry.from_buffer(bfr)
         return entry
Example #17
0
     def run_parser():
         ''' Thread body to run the supplied scanner against the input data.
 '''
         bfr = CornuCopyBuffer(chunk_iter)
         # pylint: disable=broad-except
         try:
             for offset in scanner(bfr):
                 # the scanner should yield only offsets, not chunks and offsets
                 if not isinstance(offset, int):
                     warning("discarding non-int from scanner %s: %s",
                             scanner, offset)
                 else:
                     parseQ.put(offset)
         except Exception as e:
             exception("exception from scanner %s: %s", scanner, e)
         # Consume the remainder of chunk_iter; the tee() will copy it to parseQ.
         for _ in chunk_iter:
             pass
         # end of offsets and chunks
         parseQ.close()
Example #18
0
 def parse(cls, bfr):
     ''' Return an ID3v2 frame as described here:
 '''
     self = cls()
     # pylint: disable=attribute-defined-outside-init
     if bfr.peek(3, short_ok=True) != b'ID3':
         raise ValueError("expected b'ID3'")
     bfr.take(3)
     # the 2.0 part of ID3.2.0
     self.v1, self.v2 = bfr.take(2)
     self.flags = bfr.byte0()
     size = ID3V2Size.parse_value(bfr)
     data_bs = bfr.take(size)
     data_bfr = CornuCopyBuffer([data_bs])
     dataframe_class = {
         2: ID3V22TagDataFrame,
         3: ID3V23TagDataFrame
     }[self.v1]
     self.tag_frames = list(dataframe_class.scan(data_bfr))
     return self
Example #19
0
 def parse(cls, bfr):
     ''' Parse a packet from a buffer.
 '''
     raw_payload = BSData.parse_value(bfr)
     payload_bfr = CornuCopyBuffer([raw_payload])
     self = cls()
     # pylint: disable=attribute-defined-outside-init
     self.tag = BSUInt.parse_value(payload_bfr)
     flags = BSUInt.parse_value(payload_bfr)
     has_channel = (flags & 0x01) != 0
     self.is_request = (flags & 0x02) != 0
     flags >>= 2
     self.flags = flags
     if has_channel:
         self.channel = BSUInt.parse_value(payload_bfr)
     else:
         self.channel = 0
     if self.is_request:
         self.rq_type = BSUInt.parse_value(payload_bfr)
     self.payload = b''.join(payload_bfr)
     return self
Example #20
0
def selftest():
    ''' Run some self tests.
  '''
    # pylint: disable=import-outside-toplevel
    from cs.buffer import CornuCopyBuffer
    for n in (0, 1, 2, 3, 16, 17, 127, 128, 129, 32767, 32768, 32769, 65535,
              65536, 65537):
        bs = transcribe_length_encoded_value(n)
        bfr = CornuCopyBuffer.from_bytes(bs)
        n2 = get_length_encoded_value(bfr)
        assert n == n2, "n:%s != n2:%s" % (n, n2)
        assert bfr.offset == len(
            bs), "bfr.offset:%s != len(bs):%s" % (bfr.offset, len(bs))
        assert bfr.at_eof, "bfr not at EOF"
        ds, offset = DataSize.from_bytes(bs)
        assert ds.value == n
        assert offset == len(bs)
        bs2 = bytes(ds)
        assert bs == bs2
        ds2 = DataSize(n)
        bs3 = bytes(ds2)
        assert bs == bs3
Example #21
0
 def cmd_tags(argv):
     ''' Usage: {cmd} mp3filenames...
       Print the tags from the named files.
 '''
     xit = 0
     first_print = True
     for mp3path in argv:
         with Pfx(mp3path):
             try:
                 bfr = CornuCopyBuffer.from_filename(mp3path)
             except Exception as e:  # pylint: disable=broad-except
                 error(e)
                 xit = 1
                 continue
             tags = tags_of(bfr)
             if not first_print:
                 print()
                 first_print = False
             print(mp3path)
             for tag in tags:
                 print(' ', tag)
     return xit
Example #22
0
 def test01scanners(self):
     ''' Test some domain specific data parsers.
 '''
     for parser in PARSERS:
         with self.subTest(parser.__name__):
             f = None
             testfilename = scanner_testfile(parser)
             if testfilename is None:
                 input_chunks = self.random_data
             else:
                 self.assertIsNotNone(testfilename)
                 f = open(testfilename, 'rb')
                 input_chunks = read_from(f)
             last_offset = 0
             for offset in parser(CornuCopyBuffer(input_chunks)):
                 self.assertTrue(
                     last_offset <= offset,
                     "offset %d <= last_offset %d" % (offset, last_offset))
                 last_offset = offset
             if f is not None:
                 f.close()
                 f = None
Example #23
0
 def bufferfrom(self):
   ''' Return a CornuCopyBuffer presenting data from the file.
   '''
   return CornuCopyBuffer(self.datafrom())
Example #24
0
    def parse_value(bfr):
        ''' Decode a Block reference from a buffer.

        Format is a `BSData` holding this encoded data:

            BS(flags)
              0x01 indirect blockref
              0x02 typed: type follows, otherwise BT_HASHCODE
              0x04 type flags: per type flags follow type
            BS(span)
            [BS(type)]
            [BS(type_flags)]
            union {
              type BT_HASHCODE: hash
              type BT_RLE: octet-value (repeat span times to get data)
              type BT_LITERAL: raw-data (span bytes)
              type BT_SUBBLOCK: suboffset, super block
            }

        Even though this is all decodable without the leading length
        we use a leading length so that future encodings do not
        prevent parsing any following data.
    '''
        raw_encoding = BSData.parse_value(bfr)
        blockref_bfr = CornuCopyBuffer.from_bytes(raw_encoding)
        flags = BSUInt.parse_value(blockref_bfr)
        is_indirect = bool(flags & F_BLOCK_INDIRECT)
        is_typed = bool(flags & F_BLOCK_TYPED)
        has_type_flags = bool(flags & F_BLOCK_TYPE_FLAGS)
        unknown_flags = flags & ~(F_BLOCK_INDIRECT | F_BLOCK_TYPED
                                  | F_BLOCK_TYPE_FLAGS)
        if unknown_flags:
            raise ValueError(
                "unexpected flags value (0x%02x) with unsupported flags=0x%02x"
                % (flags, unknown_flags))
        span = BSUInt.parse_value(blockref_bfr)
        if is_indirect:
            # With indirect blocks, the span is of the implied data, not
            # the referenced block's data. Therefore we build the referenced
            # block with a span of None and store the span in the indirect
            # block.
            ispan = span
            span = None
        # block type, default BT_HASHCODE
        if is_typed:
            block_type = BlockType(BSUInt.parse_value(blockref_bfr))
        else:
            block_type = BlockType.BT_HASHCODE
        if has_type_flags:
            type_flags = BSUInt.parse_value(blockref_bfr)
            if type_flags:
                warning("nonzero type_flags: 0x%02x", type_flags)
        else:
            type_flags = 0x00
        # instantiate type specific block ref
        if block_type == BlockType.BT_HASHCODE:
            hashcode = HashCode.from_buffer(blockref_bfr)
            B = HashCodeBlock(hashcode=hashcode, span=span)
        elif block_type == BlockType.BT_RLE:
            octet = blockref_bfr.take(1)
            B = RLEBlock(span, octet)
        elif block_type == BlockType.BT_LITERAL:
            data = blockref_bfr.take(span)
            B = LiteralBlock(data)
        elif block_type == BlockType.BT_SUBBLOCK:
            suboffset = BSUInt.parse_value(blockref_bfr)
            superB = BlockRecord.parse_value(blockref_bfr)
            # wrap inner Block in subspan
            B = SubBlock(superB, suboffset, span)
        else:
            raise ValueError("unsupported Block type 0x%02x" % (block_type, ))
        if is_indirect:
            B = IndirectBlock(B, span=ispan)
        if not blockref_bfr.at_eof():
            warning("unparsed data (%d bytes) follow Block %s",
                    len(raw_encoding) - blockref_bfr.offset, B)
        assert isinstance(B, _Block)
        return B
Example #25
0
 def bufferfrom(self, offset=0, **kw):
     ''' Return a CornuCopyBuffer presenting data from the Block.
 '''
     return CornuCopyBuffer(self.datafrom(start=offset, **kw),
                            offset=offset)
Example #26
0
 def __init__(self):
     self.Q = IterableQueue(1024)
     self.bfr = CornuCopyBuffer(self.Q)
Example #27
0
 def scanfrom(filepath, offset=0):
   ''' Scan the specified `filepath` from `offset`, yielding `DataRecord`s.
   '''
   bfr = CornuCopyBuffer.from_filename(filepath, offset=offset)
   yield from DataRecord.scan_with_offsets(bfr)
Example #28
0
 def file_fromchunks(self, name, chunks):
   ''' Create a new file named `name` from the data in `chunks`.
   '''
   return self.file_frombuffer(name, CornuCopyBuffer(chunks))
Example #29
0
    def __init__(self,
                 recv,
                 send,
                 request_handler=None,
                 name=None,
                 packet_grace=None,
                 tick=None):
        ''' Initialise the PacketConnection.

        Parameters:
        * `recv`: inbound binary stream.
          If this is an `int` it is taken to be an OS file descriptor,
          otherwise it should be a `cs.buffer.CornuCopyBuffer`
          or a file like object with a `read1` or `read` method.
        * `send`: outbound binary stream.
          If this is an `int` it is taken to be an OS file descriptor,
          otherwise it should be a file like object with `.write(bytes)`
          and `.flush()` methods.
          For a file descriptor sending is done via an os.dup() of
          the supplied descriptor, so the caller remains responsible
          for closing the original descriptor.
        * `packet_grace`:
          default pause in the packet sending worker
          to allow another packet to be queued
          before flushing the output stream.
          Default: `DEFAULT_PACKET_GRACE`s.
          A value of `0` will flush immediately if the queue is empty.
        * `request_handler`: an optional callable accepting
          (`rq_type`, `flags`, `payload`).
          The request_handler may return one of 5 values on success:
          * `None`: response will be 0 flags and an empty payload.
          * `int`: flags only. Response will be the flags and an empty payload.
          * `bytes`: payload only. Response will be 0 flags and the payload.
          * `str`: payload only. Response will be 0 flags and the str
                  encoded as bytes using UTF-8.
          * `(int, bytes)`: Specify flags and payload for response.
          An unsuccessful request should raise an exception, which
          will cause a failure response packet.
        * `tick`: optional tick parameter, default `None`.
          If `None`, do nothing.
          If a Boolean, call `tick_fd_2` if true, otherwise do nothing.
          Otherwise `tick` should be a callable accepting a byteslike value.
    '''
        if name is None:
            name = str(seq())
        self.name = name
        if isinstance(recv, int):
            self._recv = CornuCopyBuffer.from_fd(recv)
        elif isinstance(recv, CornuCopyBuffer):
            self._recv = recv
        else:
            self._recv = CornuCopyBuffer.from_file(recv)
        if isinstance(send, int):
            self._send = os.fdopen(os.dup(send), 'wb')
        else:
            self._send = send
        if packet_grace is None:
            packet_grace = DEFAULT_PACKET_GRACE
        if tick is None:
            tick = lambda bs: None
        elif isinstance(tick, bool):
            if tick:
                tick = tick_fd_2
            else:
                tick = lambda bs: None
        self.packet_grace = packet_grace
        self.request_handler = request_handler
        self.tick = tick
        # tags of requests in play against the local system
        self._channel_request_tags = {0: set()}
        self.notify_recv_eof = set()
        self.notify_send_eof = set()
        # LateFunctions for the requests we are performing for the remote system
        self._running = set()
        # requests we have outstanding against the remote system
        self._pending = {0: {}}
        # sequence of tag numbers
        # TODO: later, reuse old tags to prevent monotonic growth of tag field
        self._tag_seq = Seq(1)
        # work queue for local requests
        self._later = Later(4, name="%s:Later" % (self, ))
        self._later.open()
        # dispatch queue of Packets to send
        self._sendQ = IterableQueue(16)
        self._lock = Lock()
        self.closed = False
        # debugging: check for reuse of (channel,tag) etc
        self.__sent = set()
        self.__send_queued = set()
        # dispatch Thread to process received packets
        self._recv_thread = bg_thread(self._receive_loop,
                                      name="%s[_receive_loop]" % (self.name, ))
        # dispatch Thread to send data
        # primary purpose is to bundle output by deferring flushes
        self._send_thread = bg_thread(self._send_loop,
                                      name="%s[_send]" % (self.name, ))
Example #30
0
def blocked_chunks_of2(
    chunks,
    *,
    scanner=None,
    min_block=None,
    max_block=None,
):
    ''' Generator which connects to a scanner of a chunk stream in
      order to emit low level edge aligned data chunks.

      Parameters:
      * `chunks`: a source iterable of data chunks, handed to `scanner`
      * `scanner`: optional callable accepting a `CornuCopyBuffer` and
        returning an iterable of `int`s, such as a generator. `scanner`
        may be `None`, in which case only the rolling hash is used
        to locate boundaries.
      * `min_block`: the smallest amount of data that will be used
        to create a Block, default from `MIN_BLOCKSIZE` (`{MIN_BLOCKSIZE}`)
      * `max_block`: the largest amount of data that will be used to
        create a Block, default from `MAX_BLOCKSIZE` (`{MAX_BLOCKSIZE}`)

      The iterable returned from `scanner(chunks)` yields `int`s which are
      considered desirable block boundaries.
  '''
    if min_block is None:
        min_block = MIN_BLOCKSIZE
    elif min_block < 8:
        raise ValueError("rejecting min_block < 8: %s" % (min_block, ))
    if max_block is None:
        max_block = MAX_BLOCKSIZE
    elif max_block >= 1024 * 1024:
        raise ValueError("rejecting max_block >= 1024*1024: %s" %
                         (max_block, ))
    if min_block >= max_block:
        raise ValueError("rejecting min_block:%d >= max_block:%d" %
                         (min_block, max_block))
    # source data for aligned chunk construction
    dataQ = IterableQueue()
    # queue of offsets from the parser
    offsetQ = IterableQueue()
    # copy chunks to the parser and also to the post-parser chunk assembler
    tee_chunks = tee(chunks, dataQ)
    parse_bfr = CornuCopyBuffer(tee_chunks)

    runstate = defaults.runstate

    def run_parser(runstate, bfr, min_block, max_block, offsetQ):
        ''' Thread body to scan `chunks` for offsets.
        The chunks are copied to `parseQ`, then their boundary offsets.

        If thwere is a scanner we scan the input data with it first.
        When it terminates (including from some exception), we scan
        the remaining chunks with scanbuf.

        The main function processes `parseQ` and uses its chunks and offsets
        to assemble aligned chunks of data.
    '''
        try:
            offset = 0
            if scanner:
                # Consume the chunks and offsets via a queue.
                # The scanner puts offsets onto the queue.
                # When the scanner fetches from the chunks, those chunks are copied to the queue.
                # Accordingly, chunks _should_ arrive before offsets within them.
                # pylint: disable=broad-except
                try:
                    for offset in scanner(bfr):
                        if runstate.cancelled:
                            break
                        # the scanner should yield only offsets, not chunks and offsets
                        if not isinstance(offset, int):
                            warning("discarding non-int from scanner %s: %s",
                                    scanner, offset)
                        else:
                            offsetQ.put(offset)
                except Exception as e:
                    warning("exception from scanner %s: %s", scanner, e)
            # Consume the remainder of chunk_iter; the tee() will copy it to parseQ.
            # This is important to ensure that no chunk is missed.
            # We run these blocks through scanbuf() to find offsets.
            cso = bfr.offset  # offset after all the chunks so far
            assert offset <= cso
            sofar = cso - offset
            if sofar >= max_block:
                offsetQ.put(cso)
                sofar = 0
            for offset in scan(bfr,
                               sofar=sofar,
                               min_block=min_block,
                               max_block=max_block):
                if runstate.cancelled:
                    break
                offsetQ.put(cso + offset)
        finally:
            # end of offsets and chunks
            offsetQ.close()
            dataQ.close()

    # dispatch the parser
    bg_thread(run_parser,
              args=(runstate, parse_bfr, min_block, max_block, offsetQ),
              daemon=True)

    # data source for assembling aligned chunks
    data_bfr = CornuCopyBuffer(dataQ)
    sofar = 0
    offset = None
    for offset in offsetQ:
        assert offset >= sofar
        block_size = offset - sofar
        assert block_size >= 0, ("block_size:%d <= 0 -- sofar=%d, offset=%d" %
                                 (block_size, sofar, offset))
        if block_size < min_block:
            # skip over small edges
            assert scanner is not None, (
                "scanner=None but still got an overly near offset"
                " (sofar=%d, offset=%d => block_size=%d < min_block:%d)" %
                (sofar, offset, block_size, min_block))
            continue
        subchunks = data_bfr.takev(block_size)
        assert sum(map(len, subchunks)) == block_size
        if block_size > max_block:
            # break up overly long blocks without a parser
            assert scanner is not None, (
                "scanner=None but still got an overly distant offset"
                " (sofar=%d, offset=%d => block_size=%d > max_block:%d)" %
                (sofar, offset, block_size, max_block))
            yield from blocked_chunks_of2(subchunks,
                                          min_block=min_block,
                                          max_block=max_block)
        else:
            yield b''.join(subchunks)
        sofar += block_size
    bs = b''.join(data_bfr)
    if bs:
        assert len(bs) <= max_block
        yield bs