def __init__(self, adapter: RollSiteAdapter, options: dict = None): if options is None: options = {} self.adapter = adapter self.roll_site_header: ErRollSiteHeader = adapter.roll_site_header self.namespace = adapter.namespace self.name = create_store_name(self.roll_site_header) self.tagged_key = '' self.obj_type = adapter.obj_type self.proxy_endpoint = adapter.proxy_endpoint channel = self.grpc_channel_factory.create_channel(self.proxy_endpoint) self.stub = proxy_pb2_grpc.DataTransferServiceStub(channel) static_er_conf = get_static_er_conf() self.__bin_packet_len = int(options.get( RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.key, static_er_conf.get(RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.key, RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.default_value))) self.total_written = 0 self.ba = bytearray(self.__bin_packet_len) self.buffer = ArrayByteBuffer(self.ba) self.writer = PairBinWriter(pair_buffer=self.buffer) self.push_batch_cnt = 0 self.push_pair_cnt = 0 self.topic_src = proxy_pb2.Topic(name=self.name, partyId=self.roll_site_header._src_party_id, role=self.roll_site_header._src_role, callback=None) self.topic_dst = proxy_pb2.Topic(name=self.name, partyId=self.roll_site_header._dst_party_id, role=self.roll_site_header._dst_role, callback=None)
def test_byte_buffer(self): bs = bytearray(1024) buf = ArrayByteBuffer(bs) buf.write_int32(12) buf.write_bytes(b"34") buf.set_offset(0) assert buf.read_int32() == 12 assert buf.read_bytes(2) == b"34"
def put(self, k, v): if self.obj_type == 'object': L.debug(f"set tagged_key: {k}") self.tagged_key = _serdes.deserialize(k) try: self.writer.write(k, v) except IndexError as e: bin_batch = bytes(self.ba[0:self.buffer.get_offset()]) self.push(bin_batch) # TODO:0: replace 1024 with constant self.ba = bytearray( max(self.__bin_packet_len, len(k) + len(v) + 1024)) self.buffer = ArrayByteBuffer(self.ba) self.writer = PairBinWriter(pair_buffer=self.buffer) self.writer.write(k, v) except Exception as e: L.error(f"Unexpected error: {sys.exc_info()[0]}") raise e
def commit(bs=sendbuf_size): nonlocal ba nonlocal buffer nonlocal writer bin_batch = None if ba: bin_batch = bytes(ba[0:buffer.get_offset()]) ba = bytearray(bs) buffer = ArrayByteBuffer(ba) writer = PairBinWriter(pair_buffer=buffer) return bin_batch
def commit(bs=sendbuf_size): L.debug(f'generate_bin_batch commit: {done_cnt}') nonlocal ba nonlocal buffer nonlocal writer bin_batch = None if ba: bin_batch = bytes(ba[0:buffer.get_offset()]) ba = bytearray(bs) buffer = ArrayByteBuffer(ba) writer = PairBinWriter(pair_buffer=buffer) return bin_batch
def run(self): # batch stream must be executed serially, and reinit. # TODO:0: remove lock to bss rs_header = None with PutBatchTask._partition_lock[self.tag]: # tag includes partition info in tag generation L.trace(f"do_store start for tag={self.tag}, partition_id={self.partition._id}") bss = _BatchStreamStatus.get_or_create(self.tag) try: broker = TransferService.get_or_create_broker(self.tag, write_signals=1) iter_wait = 0 iter_timeout = int(CoreConfKeys.EGGROLL_CORE_FIFOBROKER_ITER_TIMEOUT_SEC.get()) batch = None batch_get_interval = 0.1 with create_adapter(self.partition) as db, db.new_batch() as wb: #for batch in broker: while not broker.is_closable(): try: batch = broker.get(block=True, timeout=batch_get_interval) except queue.Empty as e: iter_wait += batch_get_interval if iter_wait > iter_timeout: raise TimeoutError(f'timeout in PutBatchTask.run. tag={self.tag}, iter_timeout={iter_timeout}, iter_wait={iter_wait}') else: continue except BrokerClosed as e: continue iter_wait = 0 rs_header = ErRollSiteHeader.from_proto_string(batch.header.ext) batch_pairs = 0 if batch.data: bin_data = ArrayByteBuffer(batch.data) reader = PairBinReader(pair_buffer=bin_data, data=batch.data) for k_bytes, v_bytes in reader.read_all(): wb.put(k_bytes, v_bytes) batch_pairs += 1 bss.count_batch(rs_header, batch_pairs) # TODO:0 bss._data_type = rs_header._data_type if rs_header._stage == FINISH_STATUS: bss.set_done(rs_header) # starting from 0 bss.check_finish() # TransferService.remove_broker(tag) will be called in get_status phrase finished or exception got except Exception as e: L.exception(f'_run_put_batch error, tag={self.tag}, ' f'rs_key={rs_header.get_rs_key() if rs_header is not None else None}, rs_header={rs_header}') raise e finally: TransferService.remove_broker(self.tag)
def test_pair_bin(self): bs = bytearray(32) buf = ArrayByteBuffer(bs) writer = PairBinWriter(buf) for i in range(10): try: writer.write(str(i).encode(), str(i).encode()) except IndexError as e: print(buf.read_bytes(buf.get_offset(), 0)) buf.set_offset(0) writer = PairBinWriter(buf) writer.write(str(i).encode(), str(i).encode()) buf.set_offset(0) reader = PairBinReader(buf) print("last") print(list(reader.read_all()))
def bin_batch_to_pair(input_iter): L.debug(f"bin_batch_to_pair start") total_written = 0 for batch in input_iter: L.debug(f"bin_batch_to_pair batch start size:{len(batch)}") try: bin_data = ArrayByteBuffer(batch) reader = PairBinReader(pair_buffer=bin_data) for k_bytes, v_bytes in reader.read_all(): yield k_bytes, v_bytes total_written += 1 except IndexError as e: L.exception(f"error bin bath format:{e}") L.debug(f"bin_batch_to_pair batch end count:{total_written}") L.debug(f"bin_batch_to_pair total_written count:{total_written}")
def bin_batch_to_pair(input_iter): L.trace(f"bin_batch_to_pair start") write_count = 0 for batch in input_iter: L.trace(f"bin_batch_to_pair: cur batch size={len(batch)}") try: bin_data = ArrayByteBuffer(batch) reader = PairBinReader(pair_buffer=bin_data, data=batch) for k_bytes, v_bytes in reader.read_all(): yield k_bytes, v_bytes write_count += 1 except IndexError as e: L.exception(f"error bin bath format: {e}") L.trace(f"bin_batch_to_pair batch ends. total write count={write_count}") L.trace(f"bin_batch_to_pair total_written count={write_count}")
def test_pair_bin_no_abb(self): bs = bytearray(32) buf = ArrayByteBuffer(bs) writer = PairBinWriter(pair_buffer=buf, data=bs) for i in range(10): try: writer.write(str(i).encode(), str(i).encode()) except IndexError as e: writer.set_offset(0) writer = PairBinWriter(pair_buffer=buf, data=bs) writer.write(str(i).encode(), str(i).encode()) pbr = PairBinReader(pair_buffer=buf, data=writer.get_data()) print(pbr.read_bytes(writer.get_offset(), 0)) writer.set_offset(0) reader = PairBinReader(pair_buffer=buf, data=bs) print("last") print(list(reader.read_all()))
class RollSiteWriteBatch(PairWriteBatch): grpc_channel_factory = GrpcChannelFactory() # TODO:0: check if secure channel needed def __init__(self, adapter: RollSiteAdapter, options: dict = None): if options is None: options = {} self.adapter = adapter self.roll_site_header: ErRollSiteHeader = adapter.roll_site_header self.namespace = adapter.namespace self.name = create_store_name(self.roll_site_header) self.tagged_key = '' self.obj_type = adapter.obj_type self.proxy_endpoint = adapter.proxy_endpoint channel = self.grpc_channel_factory.create_channel(self.proxy_endpoint) self.stub = proxy_pb2_grpc.DataTransferServiceStub(channel) static_er_conf = get_static_er_conf() self.__bin_packet_len = int( options.get( RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.key, static_er_conf.get( RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.key, RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE. default_value))) self.total_written = 0 self.ba = bytearray(self.__bin_packet_len) self.buffer = ArrayByteBuffer(self.ba) self.writer = PairBinWriter(pair_buffer=self.buffer) self.push_cnt = 0 self.topic_src = proxy_pb2.Topic( name=self.name, partyId=self.roll_site_header._src_party_id, role=self.roll_site_header._src_role, callback=None) self.topic_dst = proxy_pb2.Topic( name=self.name, partyId=self.roll_site_header._dst_party_id, role=self.roll_site_header._dst_role, callback=None) def __repr__(self): return f'<ErRollSiteWriteBatch(' \ f'adapter={self.adapter}, ' \ f'roll_site_header={self.roll_site_header}' \ f'namespace={self.namespace}, ' \ f'name={self.name}, ' \ f'obj_type={self.obj_type}, ' \ f'proxy_endpoint={self.proxy_endpoint}) ' \ f'at {hex(id(self))}>' def generate_message(self, obj, metadata): data = proxy_pb2.Data(value=obj) metadata.seq += 1 packet = proxy_pb2.Packet(header=metadata, body=data) yield packet # TODO:0: configurable def push(self, obj): L.debug( f'pushing for task: {self.name}, partition id: {self.adapter.partition_id}, push cnt: {self.get_push_count()}' ) task_info = proxy_pb2.Task( taskId=self.name, model=proxy_pb2.Model(name=self.adapter.roll_site_header_string, dataKey=self.namespace)) command_test = proxy_pb2.Command() # TODO: conf test as config and use it conf_test = proxy_pb2.Conf(overallTimeout=200000, completionWaitTimeout=200000, packetIntervalTimeout=200000, maxRetries=10) metadata = proxy_pb2.Metadata(task=task_info, src=self.topic_src, dst=self.topic_dst, command=command_test, seq=0, ack=0) max_retry_cnt = 100 exception = None for i in range(1, max_retry_cnt + 1): try: self.stub.push(self.generate_message(obj, metadata)) exception = None self.increase_push_count() break except Exception as e: exception = e L.info( f'caught exception in pushing {self.name}, partition_id: {self.adapter.partition_id}: {e}. retrying. current retry count: {i}, max_retry_cnt: {max_retry_cnt}' ) time.sleep(min(0.1 * i, 30)) if exception: raise GrpcCallError("error in push", self.proxy_endpoint, exception) def write(self): bin_data = bytes(self.ba[0:self.buffer.get_offset()]) self.push(bin_data) self.buffer = ArrayByteBuffer(self.ba) def send_end(self): L.info(f"send_end tagged_key:{self.tagged_key}") task_info = proxy_pb2.Task( taskId=self.name, model=proxy_pb2.Model(name=self.adapter.roll_site_header_string, dataKey=self.namespace)) command_test = proxy_pb2.Command(name="set_status") conf_test = proxy_pb2.Conf(overallTimeout=20000, completionWaitTimeout=20000, packetIntervalTimeout=20000, maxRetries=10) metadata = proxy_pb2.Metadata(task=task_info, src=self.topic_src, dst=self.topic_dst, command=command_test, operator="markEnd", seq=self.get_push_count(), ack=0) packet = proxy_pb2.Packet(header=metadata) try: # TODO:0: retry and sleep for all grpc call in RollSite self.stub.unaryCall(packet) except Exception as e: raise GrpcCallError('send_end', self.proxy_endpoint, e) def close(self): bin_batch = bytes(self.ba[0:self.buffer.get_offset()]) self.push(bin_batch) self.send_end() L.info(f'closing RollSiteWriteBatch for name: {self.name}, ' f'total push count: {self.push_cnt}') def put(self, k, v): if self.obj_type == 'object': L.debug(f"set tagged_key: {k}") self.tagged_key = _serdes.deserialize(k) try: self.writer.write(k, v) except IndexError as e: bin_batch = bytes(self.ba[0:self.buffer.get_offset()]) self.push(bin_batch) # TODO:0: replace 1024 with constant self.ba = bytearray( max(self.__bin_packet_len, len(k) + len(v) + 1024)) self.buffer = ArrayByteBuffer(self.ba) self.writer = PairBinWriter(pair_buffer=self.buffer) self.writer.write(k, v) except Exception as e: L.error(f"Unexpected error: {sys.exc_info()[0]}") raise e def increase_push_count(self): self.push_cnt += 1 def get_push_count(self): return self.push_cnt
def write(self): bin_data = bytes(self.ba[0:self.buffer.get_offset()]) self.push(bin_data) self.buffer = ArrayByteBuffer(self.ba)
def __init__(self, file): self.mm = mmap.mmap(file.fileno(), 0) self.mm.seek(0) self.reader = PairBinReader(ArrayByteBuffer(self.mm))