def run(self): # batch stream must be executed serially, and reinit. # TODO:0: remove lock to bss rs_header = None with PutBatchTask._partition_lock[self.tag]: # tag includes partition info in tag generation L.trace(f"do_store start for tag={self.tag}, partition_id={self.partition._id}") bss = _BatchStreamStatus.get_or_create(self.tag) try: broker = TransferService.get_or_create_broker(self.tag, write_signals=1) iter_wait = 0 iter_timeout = int(CoreConfKeys.EGGROLL_CORE_FIFOBROKER_ITER_TIMEOUT_SEC.get()) batch = None batch_get_interval = 0.1 with create_adapter(self.partition) as db, db.new_batch() as wb: #for batch in broker: while not broker.is_closable(): try: batch = broker.get(block=True, timeout=batch_get_interval) except queue.Empty as e: iter_wait += batch_get_interval if iter_wait > iter_timeout: raise TimeoutError(f'timeout in PutBatchTask.run. tag={self.tag}, iter_timeout={iter_timeout}, iter_wait={iter_wait}') else: continue except BrokerClosed as e: continue iter_wait = 0 rs_header = ErRollSiteHeader.from_proto_string(batch.header.ext) batch_pairs = 0 if batch.data: bin_data = ArrayByteBuffer(batch.data) reader = PairBinReader(pair_buffer=bin_data, data=batch.data) for k_bytes, v_bytes in reader.read_all(): wb.put(k_bytes, v_bytes) batch_pairs += 1 bss.count_batch(rs_header, batch_pairs) # TODO:0 bss._data_type = rs_header._data_type if rs_header._stage == FINISH_STATUS: bss.set_done(rs_header) # starting from 0 bss.check_finish() # TransferService.remove_broker(tag) will be called in get_status phrase finished or exception got except Exception as e: L.exception(f'_run_put_batch error, tag={self.tag}, ' f'rs_key={rs_header.get_rs_key() if rs_header is not None else None}, rs_header={rs_header}') raise e finally: TransferService.remove_broker(self.tag)
def bin_batch_to_pair(input_iter): L.debug(f"bin_batch_to_pair start") total_written = 0 for batch in input_iter: L.debug(f"bin_batch_to_pair batch start size:{len(batch)}") try: bin_data = ArrayByteBuffer(batch) reader = PairBinReader(pair_buffer=bin_data) for k_bytes, v_bytes in reader.read_all(): yield k_bytes, v_bytes total_written += 1 except IndexError as e: L.exception(f"error bin bath format:{e}") L.debug(f"bin_batch_to_pair batch end count:{total_written}") L.debug(f"bin_batch_to_pair total_written count:{total_written}")
def bin_batch_to_pair(input_iter): L.trace(f"bin_batch_to_pair start") write_count = 0 for batch in input_iter: L.trace(f"bin_batch_to_pair: cur batch size={len(batch)}") try: bin_data = ArrayByteBuffer(batch) reader = PairBinReader(pair_buffer=bin_data, data=batch) for k_bytes, v_bytes in reader.read_all(): yield k_bytes, v_bytes write_count += 1 except IndexError as e: L.exception(f"error bin bath format: {e}") L.trace(f"bin_batch_to_pair batch ends. total write count={write_count}") L.trace(f"bin_batch_to_pair total_written count={write_count}")
def test_pair_bin(self): bs = bytearray(32) buf = ArrayByteBuffer(bs) writer = PairBinWriter(buf) for i in range(10): try: writer.write(str(i).encode(), str(i).encode()) except IndexError as e: print(buf.read_bytes(buf.get_offset(), 0)) buf.set_offset(0) writer = PairBinWriter(buf) writer.write(str(i).encode(), str(i).encode()) buf.set_offset(0) reader = PairBinReader(buf) print("last") print(list(reader.read_all()))
class FileIterator(PairIterator): def __init__(self, file): file.seek(0) self.reader = PairBinReader(FileByteBuffer(file)) def close(self): pass def __iter__(self): return self.reader.read_all()
class MmapIterator(PairIterator): def __init__(self, file): self.mm = mmap.mmap(file.fileno(), 0) self.mm.seek(0) self.reader = PairBinReader(ArrayByteBuffer(self.mm)) def close(self): pass def __iter__(self): return self.reader.read_all()
def test_pair_bin_no_abb(self): bs = bytearray(32) buf = ArrayByteBuffer(bs) writer = PairBinWriter(pair_buffer=buf, data=bs) for i in range(10): try: writer.write(str(i).encode(), str(i).encode()) except IndexError as e: writer.set_offset(0) writer = PairBinWriter(pair_buffer=buf, data=bs) writer.write(str(i).encode(), str(i).encode()) pbr = PairBinReader(pair_buffer=buf, data=writer.get_data()) print(pbr.read_bytes(writer.get_offset(), 0)) writer.set_offset(0) reader = PairBinReader(pair_buffer=buf, data=bs) print("last") print(list(reader.read_all()))
def __init__(self, file): self.mm = mmap.mmap(file.fileno(), 0) self.mm.seek(0) self.reader = PairBinReader(ArrayByteBuffer(self.mm))
def __init__(self, file): file.seek(0) self.reader = PairBinReader(FileByteBuffer(file))