Ejemplo n.º 1
0
    def recv(self, endpoint: ErEndpoint, tag, broker=FifoBroker()):
        try:

            @_exception_logger
            def fill_broker(iterable: Iterable, broker):
                iterator = iter(iterable)
                for e in iterator:
                    broker.put(e)
                broker.signal_write_finish()

            channel = self.__grpc_channel_factory.create_channel(endpoint)

            stub = transfer_pb2_grpc.TransferServiceStub(channel)
            request = transfer_pb2.TransferBatch(
                header=transfer_pb2.TransferHeader(id=1, tag=tag))

            response_iter = stub.recv(request,
                                      metadata=[(TRANSFER_BROKER_NAME, tag)])
            if broker is None:
                return response_iter
            else:
                t = Thread(target=fill_broker, args=[response_iter, broker])
                t.start()
            return broker
        except Exception as e:
            L.error(f'Error calling to {endpoint} in TransferClient.recv')
            raise e
Ejemplo n.º 2
0
    def transfer_batch_generator_from_broker(broker: FifoBroker, tag):
        i = 0
        while not broker.is_closable():
            try:
                data = broker.get(block=True, timeout=0.1)
                if data:
                    header = transfer_pb2.TransferHeader(id=i, tag=tag)
                    batch = transfer_pb2.TransferBatch(header=header, data=data)
                    i += 1

                    yield batch
            except queue.Empty as e:
                # print("transfer client queue empty")
                pass
            except BrokerClosed as e:
                break
            except Exception as e:
                print(e)
Ejemplo n.º 3
0
    def get_or_create_broker(key: str, maxsize: int = _DEFAULT_QUEUE_SIZE, write_signals=1):
        if not TransferService.has_broker(key):
            with TransferService.mutex as m:
                if not TransferService.has_broker(key):
                    L.info(f'creating broker: {key}, write signals: {write_signals}')
                    final_size = maxsize if maxsize > 0 else TransferService._DEFAULT_QUEUE_SIZE
                    TransferService.data_buffer[key] = \
                        FifoBroker(maxsize=final_size, writers=write_signals, name=key)

        return TransferService.data_buffer[key]
Ejemplo n.º 4
0
    def put_all(self, items, output=None, options: dict = None):
        if options is None:
            options = {}
        include_key = options.get("include_key", True)
        job_id = generate_job_id(self.__session_id, RollPair.PUT_ALL)

        # TODO:1: consider multiprocessing scenario. parallel size should be sent to egg_pair to set write signal count
        def send_command():
            job = ErJob(id=job_id,
                        name=RollPair.PUT_ALL,
                        inputs=[self.__store],
                        outputs=[self.__store],
                        functors=[])

            result = self.__command_client.simple_sync_send(
                input=job,
                output_type=ErJob,
                endpoint=self.ctx.get_roll()._command_endpoint,
                command_uri=CommandURI(
                    f'{RollPair.ROLL_PAIR_URI_PREFIX}/{RollPair.RUN_JOB}'),
                serdes_type=SerdesTypes.PROTOBUF)

            return result

        th = Thread(target=send_command,
                    name=f'roll_pair-send_command-{job_id}')
        th.start()
        populated_store = self.ctx.populate_processor(self.__store)
        shuffler = TransferPair(job_id)
        broker = FifoBroker()
        bb = BatchBroker(broker)
        scatter_future = shuffler.scatter(broker, self.partitioner,
                                          populated_store)

        key_serdes = self.key_serdes
        value_serdes = self.value_serdes
        try:
            if include_key:
                for k, v in items:
                    bb.put(item=(key_serdes.serialize(k),
                                 value_serdes.serialize(v)))
            else:
                k = 0
                for v in items:
                    bb.put(item=(key_serdes.serialize(k),
                                 value_serdes.serialize(v)))
                    k += 1
        finally:
            bb.signal_write_finish()

        scatter_results = scatter_future.result()
        L.debug(f"scatter_results: {scatter_results}")
        th.join()
        return RollPair(populated_store, self.ctx)
Ejemplo n.º 5
0
    def get_or_create_broker(key: str,
                             maxsize: int = _DEFAULT_QUEUE_SIZE,
                             write_signals=1):
        with TransferService.mutex:
            if not TransferService.has_broker(key):
                L.trace(
                    f'creating broker={key}, write signals={write_signals}')
                final_size = maxsize if maxsize > 0 else TransferService._DEFAULT_QUEUE_SIZE
                TransferService.data_buffer[key] = \
                    FifoBroker(maxsize=final_size, writers=write_signals, name=key)
            if key not in TransferService.event_buffer:
                TransferService.event_buffer[key] = Event()
            TransferService.event_buffer[key].set()

            return TransferService.data_buffer[key]
Ejemplo n.º 6
0
    def put_all(self, items, output=None, options: dict = None):
        if options is None:
            options = {}
        include_key = options.get("include_key", True)
        job_id = generate_job_id(self.__session_id, RollPair.PUT_ALL)

        # TODO:1: consider multiprocessing scenario. parallel size should be sent to egg_pair to set write signal count
        def send_command():
            job = ErJob(id=job_id,
                        name=RollPair.PUT_ALL,
                        inputs=[self.__store],
                        outputs=[self.__store],
                        functors=[])

            task_results = self._run_job(job)

            return self.__get_output_from_result(task_results)

        th = Thread(target=send_command,
                    name=f'roll_pair-send_command-{job_id}')
        th.start()
        populated_store = self.ctx.populate_processor(self.__store)
        shuffler = TransferPair(job_id)
        fifo_broker = FifoBroker()
        bb = BatchBroker(fifo_broker)
        scatter_future = shuffler.scatter(fifo_broker, self.partitioner,
                                          populated_store)

        key_serdes = self.key_serdes
        value_serdes = self.value_serdes
        try:
            if include_key:
                for k, v in items:
                    bb.put(item=(key_serdes.serialize(k),
                                 value_serdes.serialize(v)))
            else:
                k = 0
                for v in items:
                    bb.put(item=(key_serdes.serialize(k),
                                 value_serdes.serialize(v)))
                    k += 1
        finally:
            bb.signal_write_finish()

        scatter_results = scatter_future.result()
        th.join()
        return RollPair(populated_store, self.ctx)
Ejemplo n.º 7
0
    def scatter(self, input_broker, partition_function, output_store):
        output_partitions = output_store._partitions
        total_partitions = len(output_partitions)
        L.debug(
            f'scatter starts for {self.__transfer_id}, total partitions: {total_partitions}, output_store: {output_store}'
        )
        partitioned_brokers = [FifoBroker() for i in range(total_partitions)]
        partitioned_bb = [BatchBroker(v) for v in partitioned_brokers]
        futures = []

        @_exception_logger
        def do_partition():
            L.debug(f'do_partition start for {self.__transfer_id}')
            done_count = 0
            for k, v in BatchBroker(input_broker):
                partitioned_bb[partition_function(k)].put((k, v))
                done_count += 1
            L.debug(f"do_partition end for transfer id: {self.__transfer_id}, "
                    f"total partitions: {total_partitions}, "
                    f"cur done partition count: {done_count}")
            for broker in partitioned_bb:
                broker.signal_write_finish()
            return done_count

        futures.append(self._executor_pool.submit(do_partition))
        client = TransferClient()

        def do_send_all():
            send_all_futs = []
            for i, part in enumerate(output_partitions):
                tag = self.__generate_tag(i)
                L.debug(f"do_send_all for tag: {tag}, "
                        f"active thread count: {threading.active_count()}")
                fut = client.send(
                    TransferPair.pair_to_bin_batch(
                        BatchBroker(partitioned_brokers[i])),
                    part._processor._transfer_endpoint, tag)
                send_all_futs.append(fut)
            return CompositeFuture(send_all_futs).result()

        futures.append(self._executor_pool.submit(do_send_all))
        return CompositeFuture(futures)
Ejemplo n.º 8
0
 def _run_unary(self, func, task, shuffle=False):
     key_serdes = create_serdes(task._inputs[0]._store_locator._serdes)
     value_serdes = create_serdes(task._inputs[0]._store_locator._serdes)
     with create_adapter(task._inputs[0]) as input_db:
         L.debug(f"create_store_adatper: {task._inputs[0]}")
         with input_db.iteritems() as rb:
             L.debug(f"create_store_adatper_iter: {task._inputs[0]}")
             from eggroll.roll_pair.transfer_pair import TransferPair, BatchBroker
             if shuffle:
                 total_partitions = task._inputs[
                     0]._store_locator._total_partitions
                 output_store = task._job._outputs[0]
                 shuffle_broker = FifoBroker()
                 write_bb = BatchBroker(shuffle_broker)
                 try:
                     shuffler = TransferPair(transfer_id=task._job._id)
                     store_future = shuffler.store_broker(
                         task._outputs[0], True, total_partitions)
                     scatter_future = shuffler.scatter(
                         shuffle_broker,
                         partitioner(hash_func=hash_code,
                                     total_partitions=total_partitions),
                         output_store)
                     func(rb, key_serdes, value_serdes, write_bb)
                 finally:
                     write_bb.signal_write_finish()
                 scatter_results = scatter_future.result()
                 store_result = store_future.result()
                 L.debug(f"scatter_result:{scatter_results}")
                 L.debug(f"gather_result:{store_result}")
             else:
                 # TODO: modification may be needed when store options finished
                 with create_adapter(task._outputs[0],
                                     options=task._job._options
                                     ) as db, db.new_batch() as wb:
                     func(rb, key_serdes, value_serdes, wb)
             L.debug(f"close_store_adatper:{task._inputs[0]}")
Ejemplo n.º 9
0
    def _run_unary(self, func, task, shuffle=False, reduce_op=None):
        input_store_head = task._job._inputs[0]
        output_store_head = task._job._outputs[0]
        input_key_serdes = create_serdes(
            input_store_head._store_locator._serdes)
        input_value_serdes = create_serdes(
            input_store_head._store_locator._serdes)
        output_key_serdes = create_serdes(
            output_store_head._store_locator._serdes)
        output_value_serdes = create_serdes(
            output_store_head._store_locator._serdes)

        if input_key_serdes != output_key_serdes or \
                input_value_serdes != output_value_serdes:
            raise ValueError(
                f"input key-value serdes:{(input_key_serdes, input_value_serdes)}"
                f"differ from output key-value serdes:{(output_key_serdes, output_value_serdes)}"
            )

        if shuffle:
            from eggroll.roll_pair.transfer_pair import TransferPair
            input_total_partitions = input_store_head._store_locator._total_partitions
            output_total_partitions = output_store_head._store_locator._total_partitions
            output_store = output_store_head

            my_server_node_id = get_static_er_conf().get(
                'server_node_id', None)
            shuffler = TransferPair(transfer_id=task._job._id)
            if not task._outputs or \
                    (my_server_node_id is not None
                     and my_server_node_id != task._outputs[0]._processor._server_node_id):
                store_future = None
            else:
                store_future = shuffler.store_broker(
                    store_partition=task._outputs[0],
                    is_shuffle=True,
                    total_writers=input_total_partitions,
                    reduce_op=reduce_op)

            if not task._inputs or \
                    (my_server_node_id is not None
                     and my_server_node_id != task._inputs[0]._processor._server_node_id):
                scatter_future = None
            else:
                shuffle_broker = FifoBroker()
                write_bb = BatchBroker(shuffle_broker)
                try:
                    scatter_future = shuffler.scatter(
                        input_broker=shuffle_broker,
                        partition_function=partitioner(
                            hash_func=hash_code,
                            total_partitions=output_total_partitions),
                        output_store=output_store)
                    with create_adapter(task._inputs[0]) as input_db, \
                        input_db.iteritems() as rb:
                        func(rb, input_key_serdes, input_value_serdes,
                             write_bb)
                finally:
                    write_bb.signal_write_finish()

            if scatter_future:
                scatter_results = scatter_future.result()
            else:
                scatter_results = 'no scatter for this partition'
            if store_future:
                store_results = store_future.result()
            else:
                store_results = 'no store for this partition'
        else:  # no shuffle
            with create_adapter(task._inputs[0]) as input_db, \
                    input_db.iteritems() as rb, \
                    create_adapter(task._outputs[0], options=task._job._options) as db, \
                    db.new_batch() as wb:
                func(rb, input_key_serdes, input_value_serdes, wb)
            L.trace(f"close_store_adatper:{task._inputs[0]}")
Ejemplo n.º 10
0
    def test_send(self):
        transfer_client = TransferClient()

        broker = FifoBroker()

        broker.put(b'hello')
        broker.put(b'world')
        broker.put(b'this')
        broker.put(b'is')
        broker.put(b'a')
        broker.put(b'test')
        broker.signal_write_finish()
        future = transfer_client.send(broker=broker,
                                      endpoint=ErEndpoint(host='localhost',
                                                          port=transfer_port),
                                      tag='test')
        future.result()