def shutdown(self): ''' Shut down the Later instance: - close the request queue - close the TimerQueue if any - close the worker thread pool - dispatch a Thread to wait for completion and fire the _finished Event ''' if self._timerQ: self._timerQ.close() self._timerQ.join() # queue actions to detect activity completion bg_thread(self._finished.set)
def _refresh_sqltags_data(api, sqltags, max_age=None): ''' Refresh the queue and recordings if any unexpired records are stale or if all records are expired. ''' recordings = set(sqltags.recordings()) need_refresh = ( # any current recordings whose state is stale any(not recording.is_expired() and recording.is_stale( max_age=max_age) for recording in recordings) or # no recording is current not all(recording.is_expired() for recording in recordings)) if need_refresh: print("refresh queue and recordings...") Ts = [bg_thread(api.queue), bg_thread(api.recordings)] for T in Ts: T.join()
def pushto(self, dstS, *, capacity=64, progress=None): ''' Allocate a Queue for Blocks to push from this Store to another Store `dstS`. Return `(Q,T)` where `Q` is the new Queue and `T` is the Thread processing the Queue. Parameters: * `dstS`: the secondary Store to receive Blocks. * `capacity`: the Queue capacity, arbitrary default `64`. * `progress`: an optional `Progress` counting submitted and completed data bytes. Once called, the caller can then .put Blocks onto the Queue. When finished, call Q.close() to indicate end of Blocks and T.join() to wait for the processing completion. ''' sem = Semaphore(capacity) ##sem = Semaphore(1) name = "%s.pushto(%s)" % (self.name, dstS.name) with Pfx(name): Q = IterableQueue(capacity=capacity, name=name) srcS = self srcS.open() dstS.open() T = bg_thread(lambda: ( self.push_blocks(name, Q, srcS, dstS, sem, progress), srcS.close(), dstS.close(), )) return Q, T
def bg(self): ''' Submit a function to complete the `Task` in a separate `Thread`, returning the `Thread`. This dispatches a `Thread` to run `self.call()` and as such the `Task` must be in "pending" state, and transitions to "running". ''' return bg_thread(self.call, name=self.name)
def bg(self, func, *a, **kw): ''' Submit a function to compute the result in a separate `Thread`, returning the `Thread`. This dispatches a `Thread` to run `self.call(func,*a,**kw)` and as such the `Result` must be in "pending" state, and transitions to "running". ''' return bg_thread(self.call, name=self.name, args=[func] + list(a), kwargs=kw)
def keys(self): seen = set() Q = IterableQueue() def keys_from(S): for h in S.keys(): Q.put(h) Q.put(None) busy = 0 for S in self.read: bg_thread(partial(keys_from, S)) busy += 1 for h in Q: if h is None: busy -= 1 if not busy: Q.close() elif h not in seen: yield h seen.add(h)
def __init__(self, name, pipeline, subpipeline, outQ, **kw): super().__init__(name, pipeline, None, outQ, **kw) self.subpipeline = subpipeline outQ.open() def copy_out(sub_outQ, outQ): for item in sub_outQ: outQ.put(item) outQ.close() self.copier = bg_thread(copy_out, name="%s.copy_out" % (self, ), args=(subpipeline.outQ, outQ))
def startup(self): ''' Connect to the server and log in. ''' self._sock = self.conn_spec.connect() self.recvf = self._sock.makefile('r', encoding='iso8859-1') self.sendf = self._sock.makefile('w', encoding='ascii') self.client_begin() self.client_auth(self.conn_spec.user, self.conn_spec.password) self._result_queue = IterableQueue() self._client_worker = bg_thread( self._client_response_worker, args=(self._result_queue,) ) return self
def cmd_refresh(self, argv): ''' Usage: {cmd} [queue] [recordings] Update the db state from the PlayOn service. ''' api = self.options.api if not argv: argv = ['queue', 'recordings'] xit = 0 Ts = [] for state in argv: with Pfx(state): if state == 'queue': print("refresh queue...") Ts.append(bg_thread(api.queue)) elif state == 'recordings': print("refresh recordings...") Ts.append(bg_thread(api.recordings)) else: warning("unsupported update target") xit = 1 print("wait for API...") for T in Ts: T.join() return xit
def blocked_chunks_of2( chunks, *, scanner=None, min_block=None, max_block=None, ): ''' Generator which connects to a scanner of a chunk stream in order to emit low level edge aligned data chunks. Parameters: * `chunks`: a source iterable of data chunks, handed to `scanner` * `scanner`: optional callable accepting a `CornuCopyBuffer` and returning an iterable of `int`s, such as a generator. `scanner` may be `None`, in which case only the rolling hash is used to locate boundaries. * `min_block`: the smallest amount of data that will be used to create a Block, default from `MIN_BLOCKSIZE` (`{MIN_BLOCKSIZE}`) * `max_block`: the largest amount of data that will be used to create a Block, default from `MAX_BLOCKSIZE` (`{MAX_BLOCKSIZE}`) The iterable returned from `scanner(chunks)` yields `int`s which are considered desirable block boundaries. ''' if min_block is None: min_block = MIN_BLOCKSIZE elif min_block < 8: raise ValueError("rejecting min_block < 8: %s" % (min_block, )) if max_block is None: max_block = MAX_BLOCKSIZE elif max_block >= 1024 * 1024: raise ValueError("rejecting max_block >= 1024*1024: %s" % (max_block, )) if min_block >= max_block: raise ValueError("rejecting min_block:%d >= max_block:%d" % (min_block, max_block)) # source data for aligned chunk construction dataQ = IterableQueue() # queue of offsets from the parser offsetQ = IterableQueue() # copy chunks to the parser and also to the post-parser chunk assembler tee_chunks = tee(chunks, dataQ) parse_bfr = CornuCopyBuffer(tee_chunks) runstate = defaults.runstate def run_parser(runstate, bfr, min_block, max_block, offsetQ): ''' Thread body to scan `chunks` for offsets. The chunks are copied to `parseQ`, then their boundary offsets. If thwere is a scanner we scan the input data with it first. When it terminates (including from some exception), we scan the remaining chunks with scanbuf. The main function processes `parseQ` and uses its chunks and offsets to assemble aligned chunks of data. ''' try: offset = 0 if scanner: # Consume the chunks and offsets via a queue. # The scanner puts offsets onto the queue. # When the scanner fetches from the chunks, those chunks are copied to the queue. # Accordingly, chunks _should_ arrive before offsets within them. # pylint: disable=broad-except try: for offset in scanner(bfr): if runstate.cancelled: break # the scanner should yield only offsets, not chunks and offsets if not isinstance(offset, int): warning("discarding non-int from scanner %s: %s", scanner, offset) else: offsetQ.put(offset) except Exception as e: warning("exception from scanner %s: %s", scanner, e) # Consume the remainder of chunk_iter; the tee() will copy it to parseQ. # This is important to ensure that no chunk is missed. # We run these blocks through scanbuf() to find offsets. cso = bfr.offset # offset after all the chunks so far assert offset <= cso sofar = cso - offset if sofar >= max_block: offsetQ.put(cso) sofar = 0 for offset in scan(bfr, sofar=sofar, min_block=min_block, max_block=max_block): if runstate.cancelled: break offsetQ.put(cso + offset) finally: # end of offsets and chunks offsetQ.close() dataQ.close() # dispatch the parser bg_thread(run_parser, args=(runstate, parse_bfr, min_block, max_block, offsetQ), daemon=True) # data source for assembling aligned chunks data_bfr = CornuCopyBuffer(dataQ) sofar = 0 offset = None for offset in offsetQ: assert offset >= sofar block_size = offset - sofar assert block_size >= 0, ("block_size:%d <= 0 -- sofar=%d, offset=%d" % (block_size, sofar, offset)) if block_size < min_block: # skip over small edges assert scanner is not None, ( "scanner=None but still got an overly near offset" " (sofar=%d, offset=%d => block_size=%d < min_block:%d)" % (sofar, offset, block_size, min_block)) continue subchunks = data_bfr.takev(block_size) assert sum(map(len, subchunks)) == block_size if block_size > max_block: # break up overly long blocks without a parser assert scanner is not None, ( "scanner=None but still got an overly distant offset" " (sofar=%d, offset=%d => block_size=%d > max_block:%d)" % (sofar, offset, block_size, max_block)) yield from blocked_chunks_of2(subchunks, min_block=min_block, max_block=max_block) else: yield b''.join(subchunks) sofar += block_size bs = b''.join(data_bfr) if bs: assert len(bs) <= max_block yield bs
def blocked_chunks_of( chunks, *, scanner=None, min_block=None, max_block=None, histogram=None, ): ''' Generator which connects to a scanner of a chunk stream in order to emit low level edge aligned data chunks. *OBSOLETE*: we now use the simpler and faster `blocked_chunks_of2`. Parameters: * `chunks`: a source iterable of data chunks, handed to `scanner` * `scanner`: optional callable accepting a `CornuCopyBuffer` and returning an iterable of `int`s, such as a generator. `scanner` may be `None`, in which case only the rolling hash is used to locate boundaries. * `min_block`: the smallest amount of data that will be used to create a Block, default from `MIN_BLOCKSIZE` (`{MIN_BLOCKSIZE}`) * `max_block`: the largest amount of data that will be used to create a Block, default from `MAX_BLOCKSIZE` (`{MAX_BLOCKSIZE}`) * `histogram`: if not `None`, a `defaultdict(int)` to collate counts. Integer indices count block sizes and string indices are used for `'bytes_total'` and `'bytes_hash_scanned'`. The iterable returned from `scanner(chunks)` yields `int`s which are considered desirable block boundaries. ''' # pylint: disable=too-many-nested-blocks,too-many-statements # pylint: disable=too-many-branches,too-many-locals with Pfx("blocked_chunks_of"): if min_block is None: min_block = MIN_BLOCKSIZE elif min_block < 8: raise ValueError("rejecting min_block < 8: %s" % (min_block, )) if max_block is None: max_block = MAX_BLOCKSIZE elif max_block >= 1024 * 1024: raise ValueError("rejecting max_block >= 1024*1024: %s" % (max_block, )) if min_block >= max_block: raise ValueError("rejecting min_block:%d >= max_block:%d" % (min_block, max_block)) # obtain iterator of chunks; this avoids accidentally reusing the chunks # if for example chunks is a sequence chunk_iter = iter(chunks) # Set up parseQ, an iterable yielding a mix of source data and # offsets representing desirable block boundaries. # If there is no scanner, this is just chunk_iter. # If there is a scanner we dispatch the scanner in a separate # Thread and feed it a tee() of chunk_iter, which copies chunks # to the parseQ when chunks are obtained by the scanner. The # Thread runs the scanner and copies its output offsets to the # parseQ. # The tee() arranges that chunks arrive before any offsets within them. if scanner is None: # No scanner, consume the chunks directly. parseQ = chunk_iter else: # Consume the chunks and offsets via a queue. # The scanner puts offsets onto the queue. # When the scanner fetches from the chunks, those chunks are copied to the queue. # When the scanner terminates, any remaining chunks are also copied to the queue. parseQ = IterableQueue() chunk_iter = tee(chunk_iter, parseQ) def run_parser(): ''' Thread body to run the supplied scanner against the input data. ''' bfr = CornuCopyBuffer(chunk_iter) # pylint: disable=broad-except try: for offset in scanner(bfr): # the scanner should yield only offsets, not chunks and offsets if not isinstance(offset, int): warning("discarding non-int from scanner %s: %s", scanner, offset) else: parseQ.put(offset) except Exception as e: exception("exception from scanner %s: %s", scanner, e) # Consume the remainder of chunk_iter; the tee() will copy it to parseQ. for _ in chunk_iter: pass # end of offsets and chunks parseQ.close() bg_thread(run_parser) # inbound chunks and offsets in_offsets = [] # heap of unprocessed edge offsets # prime `available_chunk` with the first data chunk, ready for get_next_chunk try: available_chunk = next(parseQ) except StopIteration: # no data! just return return def get_next_chunk(): ''' Fetch and return the next data chunk from the `parseQ`. Return None at end of input. Also gather all the following offsets from the queue before return. Because this inherently means collecting the chunk beyond these offsets, we keep that in `available_chunk` for the next call. Sets parseQ to None if the end of the iterable is reached. ''' nonlocal parseQ, in_offsets, hash_value, available_chunk if parseQ is None: assert available_chunk is None return None next_chunk = available_chunk available_chunk = None assert not isinstance(next_chunk, int) # scan the new chunk and load potential edges into the offset heap hash_value, chunk_scan_offsets = scanbuf(hash_value, next_chunk) for cso in chunk_scan_offsets: heappush(in_offsets, offset + cso) # gather items from the parseQ until the following chunk # or end of input while True: try: item = next(parseQ) except StopIteration: parseQ = None break else: if isinstance(item, int): heappush(in_offsets, item) else: available_chunk = item break return next_chunk last_offset = None first_possible_point = None max_possible_point = None def recompute_offsets(): ''' Recompute relevant offsets from the block parameters. The first_possible_point is last_offset+min_block, the earliest point at which we will accept a block boundary. The max_possible_point is last_offset+max_block, the latest point at which we will accept a block boundary; we will choose this if no next_offset or hash offset is found earlier. ''' nonlocal last_offset, first_possible_point, max_possible_point first_possible_point = last_offset + min_block max_possible_point = last_offset + max_block # prepare initial state last_offset = 0 # latest released boundary recompute_offsets( ) # compute first_possible_point and max_possible_point hash_value = 0 offset = 0 chunk0 = None offset0 = None # unblocked outbound data pending = _PendingBuffer(max_block) # Read data chunks and locate desired boundaries. while True: chunk = get_next_chunk() if chunk is None: break # verify current chunk start offset against end of previous chunk assert chunk0 is None or offset == offset0 + len(chunk0), \ "offset0=%s, len(chunk0)=%d: sum(%d) != current offset %d" \ % (offset0, len(chunk0), offset0 + len(chunk0), offset) chunk0 = chunk offset0 = offset chunk = memoryview(chunk) chunk_end_offset = offset + len(chunk) # process current chunk advance_by = 0 # how much data to add to the pending buffer release = False # whether we hit a boundary ==> flush the buffer while chunk: if advance_by > 0: # advance through this chunk # buffer the advance # release ==> flush the buffer and update last_offset assert advance_by is not None assert advance_by >= 0 assert advance_by <= len(chunk) # save the advance bytes and yield any overflow for out_chunk in pending.append(chunk[:advance_by]): yield out_chunk if histogram is not None: out_chunk_size = len(out_chunk) histogram['bytes_total'] += out_chunk_size histogram[out_chunk_size] += 1 histogram['buffer_overflow_chunks'] += 1 offset += advance_by chunk = chunk[advance_by:] if last_offset != pending.offset: # if the flush discarded a full buffer we need to adjust our boundaries last_offset = pending.offset recompute_offsets() if release: release = False # becomes true if we should flush after taking data # yield the current pending data for out_chunk in pending.flush(): yield out_chunk if histogram is not None: out_chunk_size = len(out_chunk) histogram['bytes_total'] += out_chunk_size histogram[out_chunk_size] += 1 last_offset = pending.offset recompute_offsets() if not chunk: # consumed the end of the chunk, need a new one break advance_by = None # fetch the next available edge, None if nothing available or suitable while True: if in_offsets: next_offset = heappop(in_offsets) if next_offset > offset and next_offset >= first_possible_point: break else: next_offset = None break if next_offset is None or next_offset > chunk_end_offset: # no suitable edge: consume the chunk and advance take_to = chunk_end_offset else: # edge before end of chunk: use it take_to = next_offset release = True advance_by = take_to - offset assert advance_by > 0 # yield any left over data for out_chunk in pending.flush(): yield out_chunk if histogram is not None: out_chunk_size = len(out_chunk) histogram['bytes_total'] += out_chunk_size histogram[out_chunk_size] += 1
def startup_shutdown(self): ''' Start up and shut down the `FilesDir`: take locks, start worker threads etc. ''' self.initdir() self._rfds = {} self._unindexed = {} self._filemap = SqliteFilemap(self, self.statefilepath) hashname = self.hashname self.index = self.indexclass( self.pathto(self.INDEX_FILENAME_BASE_FORMAT.format(hashname=hashname)) ) self.index.open() self.runstate.start() # cache of open DataFiles self._cache = LRU_Cache( maxsize=4, on_remove=lambda k, datafile: datafile.close() ) # Set up data queue. # The .add() method adds the data to self._unindexed, puts the # data onto the data queue, and returns. # The data queue worker saves the data to backing files and # updates the indices. self._data_progress = Progress( name=str(self) + " data queue ", total=0, units_scale=BINARY_BYTES_SCALE, ) if defaults.show_progress: proxy_cmgr = upd_state.upd.insert(1) else: proxy_cmgr = nullcontext() with proxy_cmgr as data_proxy: self._data_proxy = data_proxy self._dataQ = IterableQueue(65536) self._data_Thread = bg_thread( self._data_queue, name="%s._data_queue" % (self,), ) self._monitor_Thread = bg_thread( self._monitor_datafiles, name="%s-datafile-monitor" % (self,), ) yield self.runstate.cancel() self.flush() # shut down the monitor Thread mon_thread = self._monitor_Thread if mon_thread is not None: mon_thread.join() self._monitor_Thread = None # drain the data queue self._dataQ.close() self._data_Thread.join() self._dataQ = None self._data_thread = None # update state to substrate self._cache = None self._filemap.close() self._filemap = None self.index.close() # close the read file descriptors for rfd in self._rfds.values(): with Pfx("os.close(rfd:%d)", rfd): os.close(rfd) del self._rfds self.runstate.stop()
def __init__(self, block, mapsize=None, blockmapdir=None, runstate=None): ''' Initialise the `BlockMap`, dispatch the index generator. Parameters: * `block`: the source `Block` * `mapsize`: the size of each index map, default `OFFSET_SCALE` * `blockmapdir`: the pathname for persistent storage of `BlockMaps` ''' super().__init__(runstate=runstate) if mapsize is None: mapsize = OFFSET_SCALE elif mapsize <= 0 or mapsize > OFFSET_SCALE: raise ValueError("mapsize(%d) out of range, must be >0 and <=%d" % (mapsize, OFFSET_SCALE)) # DEBUGGING if blockmapdir is None: blockmapdir = defaults.S.blockmapdir if not isinstance(block, IndirectBlock): raise TypeError( "block needs to be an IndirectBlock, got a %s instead" % (type(block), )) hashcode = block.superblock.hashcode hashclass = type(hashcode) self.hashclass = hashclass self.mapsize = mapsize if blockmapdir is None: self.mappath = mappath = None else: self.mappath = mappath = joinpath(blockmapdir, "mapsize:%d" % (mapsize, ), hashcode.filename) if not isdir(mappath): with Pfx("makedirs(%r)", mappath): os.makedirs(mappath) self.block = block self.S = defaults.S nsubmaps = len(block) // mapsize + 1 submaps = [None] * nsubmaps self.maps = submaps mapped_to = 0 self.rec_size = OFF_STRUCT.size + len(hashcode) self._loaded = False # preattach any existing blockmap files if mappath is not None: for submap_index in range(nsubmaps): submappath = joinpath(mappath, '%d.blockmap' % (submap_index, )) if not pathexists(submappath): break # existing map, attach and install, advance and restart loop X("Blockmap.__init__: preattach existing map %r", submappath) submaps[submap_index] = MappedFD(submappath, hashclass) mapped_to += mapsize self.mapped_to = mapped_to if mapped_to < len(block): self.runstate.start() self._worker = bg_thread(self._load_maps, args=(defaults.S, ), daemon=True, name="%s._load_maps" % (self, )) else: self._worker = None
def __init__(self, recv, send, request_handler=None, name=None, packet_grace=None, tick=None): ''' Initialise the PacketConnection. Parameters: * `recv`: inbound binary stream. If this is an `int` it is taken to be an OS file descriptor, otherwise it should be a `cs.buffer.CornuCopyBuffer` or a file like object with a `read1` or `read` method. * `send`: outbound binary stream. If this is an `int` it is taken to be an OS file descriptor, otherwise it should be a file like object with `.write(bytes)` and `.flush()` methods. For a file descriptor sending is done via an os.dup() of the supplied descriptor, so the caller remains responsible for closing the original descriptor. * `packet_grace`: default pause in the packet sending worker to allow another packet to be queued before flushing the output stream. Default: `DEFAULT_PACKET_GRACE`s. A value of `0` will flush immediately if the queue is empty. * `request_handler`: an optional callable accepting (`rq_type`, `flags`, `payload`). The request_handler may return one of 5 values on success: * `None`: response will be 0 flags and an empty payload. * `int`: flags only. Response will be the flags and an empty payload. * `bytes`: payload only. Response will be 0 flags and the payload. * `str`: payload only. Response will be 0 flags and the str encoded as bytes using UTF-8. * `(int, bytes)`: Specify flags and payload for response. An unsuccessful request should raise an exception, which will cause a failure response packet. * `tick`: optional tick parameter, default `None`. If `None`, do nothing. If a Boolean, call `tick_fd_2` if true, otherwise do nothing. Otherwise `tick` should be a callable accepting a byteslike value. ''' if name is None: name = str(seq()) self.name = name if isinstance(recv, int): self._recv = CornuCopyBuffer.from_fd(recv) elif isinstance(recv, CornuCopyBuffer): self._recv = recv else: self._recv = CornuCopyBuffer.from_file(recv) if isinstance(send, int): self._send = os.fdopen(os.dup(send), 'wb') else: self._send = send if packet_grace is None: packet_grace = DEFAULT_PACKET_GRACE if tick is None: tick = lambda bs: None elif isinstance(tick, bool): if tick: tick = tick_fd_2 else: tick = lambda bs: None self.packet_grace = packet_grace self.request_handler = request_handler self.tick = tick # tags of requests in play against the local system self._channel_request_tags = {0: set()} self.notify_recv_eof = set() self.notify_send_eof = set() # LateFunctions for the requests we are performing for the remote system self._running = set() # requests we have outstanding against the remote system self._pending = {0: {}} # sequence of tag numbers # TODO: later, reuse old tags to prevent monotonic growth of tag field self._tag_seq = Seq(1) # work queue for local requests self._later = Later(4, name="%s:Later" % (self, )) self._later.open() # dispatch queue of Packets to send self._sendQ = IterableQueue(16) self._lock = Lock() self.closed = False # debugging: check for reuse of (channel,tag) etc self.__sent = set() self.__send_queued = set() # dispatch Thread to process received packets self._recv_thread = bg_thread(self._receive_loop, name="%s[_receive_loop]" % (self.name, )) # dispatch Thread to send data # primary purpose is to bundle output by deferring flushes self._send_thread = bg_thread(self._send_loop, name="%s[_send]" % (self.name, ))