def put_all(self, items, output=None, options: dict = None): if options is None: options = {} include_key = options.get("include_key", True) job_id = generate_job_id(self.__session_id, RollPair.PUT_ALL) # TODO:1: consider multiprocessing scenario. parallel size should be sent to egg_pair to set write signal count def send_command(): job = ErJob(id=job_id, name=RollPair.PUT_ALL, inputs=[self.__store], outputs=[self.__store], functors=[]) result = self.__command_client.simple_sync_send( input=job, output_type=ErJob, endpoint=self.ctx.get_roll()._command_endpoint, command_uri=CommandURI( f'{RollPair.ROLL_PAIR_URI_PREFIX}/{RollPair.RUN_JOB}'), serdes_type=SerdesTypes.PROTOBUF) return result th = Thread(target=send_command, name=f'roll_pair-send_command-{job_id}') th.start() populated_store = self.ctx.populate_processor(self.__store) shuffler = TransferPair(job_id) broker = FifoBroker() bb = BatchBroker(broker) scatter_future = shuffler.scatter(broker, self.partitioner, populated_store) key_serdes = self.key_serdes value_serdes = self.value_serdes try: if include_key: for k, v in items: bb.put(item=(key_serdes.serialize(k), value_serdes.serialize(v))) else: k = 0 for v in items: bb.put(item=(key_serdes.serialize(k), value_serdes.serialize(v))) k += 1 finally: bb.signal_write_finish() scatter_results = scatter_future.result() L.debug(f"scatter_results: {scatter_results}") th.join() return RollPair(populated_store, self.ctx)
def put_all(self, items, output=None, options: dict = None): if options is None: options = {} include_key = options.get("include_key", True) job_id = generate_job_id(self.__session_id, RollPair.PUT_ALL) # TODO:1: consider multiprocessing scenario. parallel size should be sent to egg_pair to set write signal count def send_command(): job = ErJob(id=job_id, name=RollPair.PUT_ALL, inputs=[self.__store], outputs=[self.__store], functors=[]) task_results = self._run_job(job) return self.__get_output_from_result(task_results) th = Thread(target=send_command, name=f'roll_pair-send_command-{job_id}') th.start() populated_store = self.ctx.populate_processor(self.__store) shuffler = TransferPair(job_id) fifo_broker = FifoBroker() bb = BatchBroker(fifo_broker) scatter_future = shuffler.scatter(fifo_broker, self.partitioner, populated_store) key_serdes = self.key_serdes value_serdes = self.value_serdes try: if include_key: for k, v in items: bb.put(item=(key_serdes.serialize(k), value_serdes.serialize(v))) else: k = 0 for v in items: bb.put(item=(key_serdes.serialize(k), value_serdes.serialize(v))) k += 1 finally: bb.signal_write_finish() scatter_results = scatter_future.result() th.join() return RollPair(populated_store, self.ctx)
def _run_unary(self, func, task, shuffle=False): key_serdes = create_serdes(task._inputs[0]._store_locator._serdes) value_serdes = create_serdes(task._inputs[0]._store_locator._serdes) with create_adapter(task._inputs[0]) as input_db: L.debug(f"create_store_adatper: {task._inputs[0]}") with input_db.iteritems() as rb: L.debug(f"create_store_adatper_iter: {task._inputs[0]}") from eggroll.roll_pair.transfer_pair import TransferPair, BatchBroker if shuffle: total_partitions = task._inputs[ 0]._store_locator._total_partitions output_store = task._job._outputs[0] shuffle_broker = FifoBroker() write_bb = BatchBroker(shuffle_broker) try: shuffler = TransferPair(transfer_id=task._job._id) store_future = shuffler.store_broker( task._outputs[0], True, total_partitions) scatter_future = shuffler.scatter( shuffle_broker, partitioner(hash_func=hash_code, total_partitions=total_partitions), output_store) func(rb, key_serdes, value_serdes, write_bb) finally: write_bb.signal_write_finish() scatter_results = scatter_future.result() store_result = store_future.result() L.debug(f"scatter_result:{scatter_results}") L.debug(f"gather_result:{store_result}") else: # TODO: modification may be needed when store options finished with create_adapter(task._outputs[0], options=task._job._options ) as db, db.new_batch() as wb: func(rb, key_serdes, value_serdes, wb) L.debug(f"close_store_adatper:{task._inputs[0]}")
def _run_unary(self, func, task, shuffle=False, reduce_op=None): input_store_head = task._job._inputs[0] output_store_head = task._job._outputs[0] input_key_serdes = create_serdes( input_store_head._store_locator._serdes) input_value_serdes = create_serdes( input_store_head._store_locator._serdes) output_key_serdes = create_serdes( output_store_head._store_locator._serdes) output_value_serdes = create_serdes( output_store_head._store_locator._serdes) if input_key_serdes != output_key_serdes or \ input_value_serdes != output_value_serdes: raise ValueError( f"input key-value serdes:{(input_key_serdes, input_value_serdes)}" f"differ from output key-value serdes:{(output_key_serdes, output_value_serdes)}" ) if shuffle: from eggroll.roll_pair.transfer_pair import TransferPair input_total_partitions = input_store_head._store_locator._total_partitions output_total_partitions = output_store_head._store_locator._total_partitions output_store = output_store_head my_server_node_id = get_static_er_conf().get( 'server_node_id', None) shuffler = TransferPair(transfer_id=task._job._id) if not task._outputs or \ (my_server_node_id is not None and my_server_node_id != task._outputs[0]._processor._server_node_id): store_future = None else: store_future = shuffler.store_broker( store_partition=task._outputs[0], is_shuffle=True, total_writers=input_total_partitions, reduce_op=reduce_op) if not task._inputs or \ (my_server_node_id is not None and my_server_node_id != task._inputs[0]._processor._server_node_id): scatter_future = None else: shuffle_broker = FifoBroker() write_bb = BatchBroker(shuffle_broker) try: scatter_future = shuffler.scatter( input_broker=shuffle_broker, partition_function=partitioner( hash_func=hash_code, total_partitions=output_total_partitions), output_store=output_store) with create_adapter(task._inputs[0]) as input_db, \ input_db.iteritems() as rb: func(rb, input_key_serdes, input_value_serdes, write_bb) finally: write_bb.signal_write_finish() if scatter_future: scatter_results = scatter_future.result() else: scatter_results = 'no scatter for this partition' if store_future: store_results = store_future.result() else: store_results = 'no store for this partition' else: # no shuffle with create_adapter(task._inputs[0]) as input_db, \ input_db.iteritems() as rb, \ create_adapter(task._outputs[0], options=task._job._options) as db, \ db.new_batch() as wb: func(rb, input_key_serdes, input_value_serdes, wb) L.trace(f"close_store_adatper:{task._inputs[0]}")