Ejemplo n.º 1
0
def get_debug_test_context(is_standalone=False, manager_port=4670, egg_port=20001, transfer_port=20002, session_id='testing'):
    manager_port = manager_port
    egg_ports = [egg_port]
    egg_transfer_ports = [transfer_port]
    self_server_node_id = 2

    options = {}
    if is_standalone:
        options[SessionConfKeys.CONFKEY_SESSION_DEPLOY_MODE] = "standalone"
    options[TransferConfKeys.CONFKEY_TRANSFER_SERVICE_HOST] = "127.0.0.1"
    options[TransferConfKeys.CONFKEY_TRANSFER_SERVICE_PORT] = str(transfer_port)
    options[ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_PORT] = str(manager_port)
    options[NodeManagerConfKeys.CONFKEY_NODE_MANAGER_PORT] = str(manager_port)

    egg = ErProcessor(id=1,
                      server_node_id=self_server_node_id,
                      processor_type=ProcessorTypes.EGG_PAIR,
                      status=ProcessorStatus.RUNNING,
                      command_endpoint=ErEndpoint("127.0.0.1", egg_ports[0]),
                      transfer_endpoint=ErEndpoint("127.0.0.1",
                                                   egg_transfer_ports[0]))

    roll = ErProcessor(id=1,
                       server_node_id=self_server_node_id,
                       processor_type=ProcessorTypes.ROLL_PAIR_MASTER,
                       status=ProcessorStatus.RUNNING,
                       command_endpoint=ErEndpoint("127.0.0.1", manager_port))

    session = ErSession(session_id,
                        processors=[egg, roll],
                        options=options)
    context = RollPairContext(session)
    return context
Ejemplo n.º 2
0
def get_option(role,
               self_party_id=None,
               conf_file=default_props_file,
               deploy_mode=DeployModes.CLUSTER):
    print(f'conf file: {conf_file}')
    configs = configparser.ConfigParser()

    configs.read(conf_file)
    eggroll_configs = configs['eggroll']

    options = {}

    if self_party_id is None:
        party_id = eggroll_configs[
            RollSiteConfKeys.EGGROLL_ROLLSITE_PARTY_ID.key]
    else:
        party_id = self_party_id

    options['self_party_id'] = party_id
    options['self_role'] = role

    options['proxy_endpoint'] = \
        ErEndpoint(host=eggroll_configs[RollSiteConfKeys.EGGROLL_ROLLSITE_HOST.key],
                   port=int(eggroll_configs[RollSiteConfKeys.EGGROLL_ROLLSITE_PORT.key]))

    options[RollSiteConfKeys.EGGROLL_ROLLSITE_DEPLOY_MODE.key] = deploy_mode

    return options
Ejemplo n.º 3
0
    def __init__(self, options: dict = None):
        if options is None:
            options = {}

        super().__init__(options)

        er_partition = options['er_partition']
        self.partition = er_partition
        self.store_locator = er_partition._store_locator
        self.partition_id = er_partition._id

        self.namespace = self.store_locator._namespace

        #_store_type = StoreTypes.ROLLPAIR_ROLLSITE
        # self._store_locator = meta_pb2.StoreLocator(storeType=_store_type,
        #                                             namespace=self.namespace,
        #                                             name=self.store_locator._name,
        #                                             partitioner=self.store_locator._partitioner,
        #                                             serdes=self.store_locator._serdes,
        #                                             totalPartitions=self.store_locator._total_partitions)

        self.roll_site_header_string = options.get('roll_site_header', None)
        self.is_writable = False
        if self.roll_site_header_string:
            self.roll_site_header = ErRollSiteHeader.from_proto_string(
                self.roll_site_header_string.encode(stringify_charset))
            self.proxy_endpoint = ErEndpoint.from_proto_string(
                options['proxy_endpoint'].encode(stringify_charset))
            self.obj_type = options['obj_type']
            self.is_writable = True

            L.info(
                f"writable RollSiteAdapter: {self.namespace}, {self.partition_id}. proxy_endpoint: {self.proxy_endpoint}, partition: {self.partition}"
            )
Ejemplo n.º 4
0
def init_roll_site_context(runtime_conf, session_id):
    from eggroll.roll_site.roll_site import RollSiteContext
    from eggroll.roll_pair.roll_pair import RollPairContext
    LOGGER.info("init_roll_site_context runtime_conf: {}".format(runtime_conf))
    session_instance = FateSession.get_instance()._eggroll.get_session()
    rp_context = RollPairContext(session_instance)

    role = runtime_conf.get("local").get("role")
    party_id = str(runtime_conf.get("local").get("party_id"))
    _path = file_utils.get_project_base_directory(
    ) + "/arch/conf/server_conf.json"

    server_conf = file_utils.load_json_conf(_path)
    host = server_conf.get('servers').get('proxy').get("host")
    port = server_conf.get('servers').get('proxy').get("port")

    options = {
        'self_role': role,
        'self_party_id': party_id,
        'proxy_endpoint': ErEndpoint(host, int(port))
    }

    rs_context = RollSiteContext(session_id,
                                 rp_ctx=rp_context,
                                 options=options)
    LOGGER.info("init_roll_site_context done: {}".format(rs_context.__dict__))
    return rp_context, rs_context
Ejemplo n.º 5
0
    def __init__(self,
                 roll_site_session_id,
                 rp_ctx: RollPairContext,
                 options: dict = None):
        if options is None:
            options = {}
        self.roll_site_session_id = roll_site_session_id
        self.rp_ctx = rp_ctx

        self.push_session_enabled = RollSiteConfKeys.EGGROLL_ROLLSITE_PUSH_SESSION_ENABLED.get_with(
            options)
        if self.push_session_enabled:
            # create session for push roll_pair and object
            self._push_session = ErSession(
                session_id=roll_site_session_id + "_push",
                options=rp_ctx.get_session().get_all_options())
            self._push_rp_ctx = RollPairContext(session=self._push_session)
            L.info(
                f"push_session={self._push_session.get_session_id()} enabled")

            def stop_push_session():
                self._push_session.stop()
        else:
            self._push_session = None
            self._push_rp_ctx = None

        self.role = options["self_role"]
        self.party_id = str(options["self_party_id"])
        self._options = options

        self._registered_comm_types = dict()
        self.register_comm_type('grpc', RollSiteGrpc)

        endpoint = options["proxy_endpoint"]
        if isinstance(endpoint, str):
            splitted = endpoint.split(':')
            self.proxy_endpoint = ErEndpoint(host=splitted[0].strip(),
                                             port=int(splitted[1].strip()))
        elif isinstance(endpoint, ErEndpoint):
            self.proxy_endpoint = endpoint
        else:
            raise ValueError("endpoint only support str and ErEndpoint type")

        self.is_standalone = RollSiteConfKeys.EGGROLL_ROLLSITE_DEPLOY_MODE.get_with(
            options) == "standalone"
        # if self.is_standalone:
        #     self.stub = None
        # else:
        #     channel = self.grpc_channel_factory.create_channel(self.proxy_endpoint)
        #     self.stub = proxy_pb2_grpc.DataTransferServiceStub(channel)

        self.pushing_latch = CountDownLatch(0)
        self.rp_ctx.get_session().add_exit_task(self._wait_push_complete)
        if self.push_session_enabled:
            self.rp_ctx.get_session().add_exit_task(stop_push_session)
        self._wait_push_exit_timeout = int(
            RollSiteConfKeys.EGGROLL_ROLLSITE_PUSH_OVERALL_TIMEOUT_SEC.
            get_with(options))

        L.info(f"inited RollSiteContext: {self.__dict__}")
Ejemplo n.º 6
0
 def __init__(self, options: dict = None):
     if options is None:
         options = {}
     self.__endpoint = ErEndpoint(
         options[NodeManagerConfKeys.CONFKEY_NODE_MANAGER_HOST],
         int(options[NodeManagerConfKeys.CONFKEY_NODE_MANAGER_PORT]))
     if 'serdes_type' in options:
         self.__serdes_type = options['serdes_type']
     else:
         self.__serdes_type = SerdesTypes.PROTOBUF
     self.__command_client = CommandClient()
Ejemplo n.º 7
0
    def __init__(self, options: dict = None):
        if options is None:
            options = {}

        super().__init__(options)

        er_partition = options['er_partition']
        self.partition = er_partition
        self.store_locator = er_partition._store_locator
        self.partition_id = er_partition._id

        self.namespace = self.store_locator._namespace

        #_store_type = StoreTypes.ROLLPAIR_ROLLSITE
        # self._store_locator = meta_pb2.StoreLocator(storeType=_store_type,
        #                                             namespace=self.namespace,
        #                                             name=self.store_locator._name,
        #                                             partitioner=self.store_locator._partitioner,
        #                                             serdes=self.store_locator._serdes,
        #                                             totalPartitions=self.store_locator._total_partitions)

        self.roll_site_header_string = options.get('roll_site_header', None)
        self.is_writable = False
        if self.roll_site_header_string:
            self.roll_site_header = ErRollSiteHeader.from_proto_string(
                self.roll_site_header_string.encode(stringify_charset))
            self.roll_site_header._options['partition_id'] = self.partition_id
            self.proxy_endpoint = ErEndpoint.from_proto_string(
                options['proxy_endpoint'].encode(stringify_charset))
            self.obj_type = options['obj_type']
            self.is_writable = True

            L.trace(
                f"writable RollSiteAdapter: {self.namespace}, partition_id={self.partition_id}. proxy_endpoint={self.proxy_endpoint}, partition={self.partition}"
            )

        self.unarycall_max_retry_cnt = int(
            RollSiteConfKeys.EGGROLL_ROLLSITE_UNARYCALL_CLIENT_MAX_RETRY.
            get_with(options))
        self.push_max_retry_cnt = int(
            RollSiteConfKeys.EGGROLL_ROLLSITE_PUSH_CLIENT_MAX_RETRY.get_with(
                options))
        self.push_overall_timeout = int(
            RollSiteConfKeys.EGGROLL_ROLLSITE_OVERALL_TIMEOUT_SEC.get_with(
                options))
        self.push_completion_wait_timeout = int(
            RollSiteConfKeys.EGGROLL_ROLLSITE_COMPLETION_WAIT_TIMEOUT_SEC.
            get_with(options))
        self.push_packet_interval_timeout = int(
            RollSiteConfKeys.EGGROLL_ROLLSITE_PACKET_INTERVAL_TIMEOUT_SEC.
            get_with(options))
Ejemplo n.º 8
0
def get_option(role, conf_file=default_props_file):
    print(f'conf file: {conf_file}')
    configs = configparser.ConfigParser()

    configs.read(conf_file)
    eggroll_configs = configs['eggroll']

    options = {}
    party_id = eggroll_configs[RollSiteConfKeys.EGGROLL_ROLLSITE_PARTY_ID.key]
    options['self_party_id'] = party_id
    options['self_role'] = role

    options['proxy_endpoint'] = \
        ErEndpoint(host=eggroll_configs[RollSiteConfKeys.EGGROLL_ROLLSITE_HOST.key],
                   port=int(eggroll_configs[RollSiteConfKeys.EGGROLL_ROLLSITE_PORT.key]))

    return options
Ejemplo n.º 9
0
    def test_send(self):
        transfer_client = TransferClient()

        broker = FifoBroker()

        broker.put(b'hello')
        broker.put(b'world')
        broker.put(b'this')
        broker.put(b'is')
        broker.put(b'a')
        broker.put(b'test')
        broker.signal_write_finish()
        future = transfer_client.send(broker=broker,
                                      endpoint=ErEndpoint(host='localhost',
                                                          port=transfer_port),
                                      tag='test')
        future.result()
Ejemplo n.º 10
0
    def __init__(self, rp_ctx, rs_session_id, party, proxy_endpoint):
        LOGGER.debug(f"[federation.eggroll]init federation: "
                     f"rp_session_id={rp_ctx.session_id}, rs_session_id={rs_session_id}, "
                     f"party={party}, proxy_endpoint={proxy_endpoint}")

        from eggroll.core.meta_model import ErEndpoint
        if isinstance(proxy_endpoint, str):
            splited = proxy_endpoint.split(':')
            proxy_endpoint = ErEndpoint(host=splited[0].strip(), port=int(splited[1].strip()))

        options = {
            'self_role': party.role,
            'self_party_id': party.party_id,
            'proxy_endpoint': proxy_endpoint
        }
        self._rsc = RollSiteContext(rs_session_id, rp_ctx=rp_ctx, options=options)
        LOGGER.debug(f"[federation.eggroll]init federation context done")
Ejemplo n.º 11
0
    def __init__(self,
                 roll_site_session_id,
                 rp_ctx: RollPairContext,
                 options: dict = None):
        if options is None:
            options = {}
        self.roll_site_session_id = roll_site_session_id
        self.rp_ctx = rp_ctx

        self.role = options["self_role"]
        self.party_id = str(options["self_party_id"])
        self._options = options
        endpoint = options["proxy_endpoint"]
        if isinstance(endpoint, str):
            splitted = endpoint.split(':')
            self.proxy_endpoint = ErEndpoint(host=splitted[0].strip(),
                                             port=int(splitted[1].strip()))
        elif isinstance(endpoint, ErEndpoint):
            self.proxy_endpoint = endpoint
        else:
            raise ValueError("endpoint only support str and ErEndpoint type")

        self.is_standalone = RollSiteConfKeys.EGGROLL_ROLLSITE_DEPLOY_MODE.get_with(
            options) == "standalone"
        if self.is_standalone:
            self.stub = None
        else:
            channel = self.grpc_channel_factory.create_channel(
                self.proxy_endpoint)
            self.stub = proxy_pb2_grpc.DataTransferServiceStub(channel)

        self.pushing_latch = CountDownLatch(0)
        self.rp_ctx.get_session().add_exit_task(self._wait_push_complete)
        self._wait_push_exit_timeout = int(
            RollSiteConfKeys.EGGROLL_ROLLSITE_PUSH_OVERALL_TIMEOUT_SEC.
            get_with(options))

        L.info(f"inited RollSiteContext: {self.__dict__}")
Ejemplo n.º 12
0
    def __init__(self, options=None):
        if options is None:
            options = {}
        static_er_conf = get_static_er_conf()
        host = options.get(
            ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_HOST,
            static_er_conf.get(
                ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_HOST, None))
        port = options.get(
            ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_PORT,
            static_er_conf.get(
                ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_PORT, None))

        if not host or not port:
            raise ValueError(
                f'failed to load host or port in creating cluster manager client. host: {host}, port: {port}'
            )

        self.__endpoint = ErEndpoint(host, int(port))
        if 'serdes_type' in options:
            self.__serdes_type = options['serdes_type']
        else:
            self.__serdes_type = SerdesTypes.PROTOBUF
        self.__command_client = CommandClient()
Ejemplo n.º 13
0
    options['self_party_id'] = party_id
    options['self_role'] = role

    options['proxy_endpoint'] = \
        ErEndpoint(host=eggroll_configs[RollSiteConfKeys.EGGROLL_ROLLSITE_HOST.key],
                   port=int(eggroll_configs[RollSiteConfKeys.EGGROLL_ROLLSITE_PORT.key]))

    return options


host_ip = 'localhost'
guest_ip = 'localhost'
host_options = {
    'self_role': 'host',
    'self_party_id': 10001,
    'proxy_endpoint': ErEndpoint(host=host_ip, port=9395),
}

guest_options = {
    'self_role': 'guest',
    'self_party_id': 10002,
    'proxy_endpoint': ErEndpoint(host=guest_ip, port=9396),
}

ER_STORE1 = ErStore(store_locator=ErStoreLocator(
    store_type=StoreTypes.ROLLPAIR_LEVELDB, namespace="namespace",
    name="name"))

roll_site_session_id = f'atest'

Ejemplo n.º 14
0
def serve(args):
    prefix = 'v1/egg-pair'

    set_data_dir(args.data_dir)

    CommandRouter.get_instance().register(
        service_name=f"{prefix}/runTask",
        route_to_module_name="eggroll.roll_pair.egg_pair",
        route_to_class_name="EggPair",
        route_to_method_name="run_task")

    max_workers = int(
        RollPairConfKeys.
        EGGROLL_ROLLPAIR_EGGPAIR_SERVER_EXECUTOR_POOL_MAX_SIZE.get())
    executor_pool_type = CoreConfKeys.EGGROLL_CORE_DEFAULT_EXECUTOR_POOL.get()
    command_server = grpc.server(
        create_executor_pool(canonical_name=executor_pool_type,
                             max_workers=max_workers,
                             thread_name_prefix="eggpair-command-server"),
        options=
        [("grpc.max_metadata_size",
          int(CoreConfKeys.
              EGGROLL_CORE_GRPC_SERVER_CHANNEL_MAX_INBOUND_METADATA_SIZE.get())
          ),
         ('grpc.max_send_message_length',
          int(CoreConfKeys.
              EGGROLL_CORE_GRPC_SERVER_CHANNEL_MAX_INBOUND_MESSAGE_SIZE.get())
          ),
         ('grpc.max_receive_message_length',
          int(CoreConfKeys.
              EGGROLL_CORE_GRPC_SERVER_CHANNEL_MAX_INBOUND_MESSAGE_SIZE.get())
          ),
         ('grpc.keepalive_time_ms',
          int(CoreConfKeys.CONFKEY_CORE_GRPC_CHANNEL_KEEPALIVE_TIME_SEC.get())
          * 1000),
         ('grpc.keepalive_timeout_ms',
          int(CoreConfKeys.
              CONFKEY_CORE_GRPC_SERVER_CHANNEL_KEEPALIVE_TIMEOUT_SEC.get()) *
          1000),
         ('grpc.keepalive_permit_without_calls',
          int(CoreConfKeys.
              CONFKEY_CORE_GRPC_SERVER_CHANNEL_KEEPALIVE_WITHOUT_CALLS_ENABLED.
              get())),
         ('grpc.per_rpc_retry_buffer_size',
          int(CoreConfKeys.CONFKEY_CORE_GRPC_SERVER_CHANNEL_RETRY_BUFFER_SIZE.
              get())), ('grpc.so_reuseport', False)])

    command_servicer = CommandServicer()
    command_pb2_grpc.add_CommandServiceServicer_to_server(
        command_servicer, command_server)

    transfer_servicer = GrpcTransferServicer()

    port = args.port
    transfer_port = args.transfer_port

    port = command_server.add_insecure_port(f'[::]:{port}')

    if transfer_port == "-1":
        transfer_server = command_server
        transfer_port = port
        transfer_pb2_grpc.add_TransferServiceServicer_to_server(
            transfer_servicer, transfer_server)
    else:
        transfer_server_max_workers = int(
            RollPairConfKeys.
            EGGROLL_ROLLPAIR_EGGPAIR_DATA_SERVER_EXECUTOR_POOL_MAX_SIZE.get())
        transfer_server = grpc.server(
            create_executor_pool(canonical_name=executor_pool_type,
                                 max_workers=transfer_server_max_workers,
                                 thread_name_prefix="transfer_server"),
            options=
            [('grpc.max_metadata_size',
              int(CoreConfKeys.
                  EGGROLL_CORE_GRPC_SERVER_CHANNEL_MAX_INBOUND_METADATA_SIZE.
                  get())),
             ('grpc.max_send_message_length',
              int(CoreConfKeys.
                  EGGROLL_CORE_GRPC_SERVER_CHANNEL_MAX_INBOUND_MESSAGE_SIZE.
                  get())),
             ('grpc.max_receive_message_length',
              int(CoreConfKeys.
                  EGGROLL_CORE_GRPC_SERVER_CHANNEL_MAX_INBOUND_MESSAGE_SIZE.
                  get())),
             ('grpc.keepalive_time_ms',
              int(CoreConfKeys.
                  CONFKEY_CORE_GRPC_SERVER_CHANNEL_KEEPALIVE_WITHOUT_CALLS_ENABLED
                  .get()) * 1000),
             ('grpc.keepalive_timeout_ms',
              int(CoreConfKeys.
                  CONFKEY_CORE_GRPC_SERVER_CHANNEL_KEEPALIVE_TIMEOUT_SEC.get())
              * 1000),
             ('grpc.keepalive_permit_without_calls',
              int(CoreConfKeys.
                  CONFKEY_CORE_GRPC_SERVER_CHANNEL_KEEPALIVE_WITHOUT_CALLS_ENABLED
                  .get())),
             ('grpc.per_rpc_retry_buffer_size',
              int(CoreConfKeys.
                  CONFKEY_CORE_GRPC_SERVER_CHANNEL_RETRY_BUFFER_SIZE.get())),
             ('grpc.so_reuseport', False)])
        transfer_port = transfer_server.add_insecure_port(
            f'[::]:{transfer_port}')
        transfer_pb2_grpc.add_TransferServiceServicer_to_server(
            transfer_servicer, transfer_server)
        transfer_server.start()
    pid = os.getpid()

    L.info(
        f"starting egg_pair service, port: {port}, transfer port: {transfer_port}, pid: {pid}"
    )
    command_server.start()

    cluster_manager = args.cluster_manager
    myself = None
    cluster_manager_client = None
    if cluster_manager:
        session_id = args.session_id
        server_node_id = int(args.server_node_id)
        static_er_conf = get_static_er_conf()
        static_er_conf['server_node_id'] = server_node_id

        if not session_id:
            raise ValueError('session id is missing')
        options = {SessionConfKeys.CONFKEY_SESSION_ID: args.session_id}
        myself = ErProcessor(id=int(args.processor_id),
                             server_node_id=server_node_id,
                             processor_type=ProcessorTypes.EGG_PAIR,
                             command_endpoint=ErEndpoint(host='localhost',
                                                         port=port),
                             transfer_endpoint=ErEndpoint(host='localhost',
                                                          port=transfer_port),
                             pid=pid,
                             options=options,
                             status=ProcessorStatus.RUNNING)

        cluster_manager_host, cluster_manager_port = cluster_manager.strip(
        ).split(':')

        L.info(f'egg_pair cluster_manager: {cluster_manager}')
        cluster_manager_client = ClusterManagerClient(
            options={
                ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_HOST:
                cluster_manager_host,
                ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_PORT:
                cluster_manager_port
            })
        cluster_manager_client.heartbeat(myself)

        if platform.system() == "Windows":
            t1 = threading.Thread(target=stop_processor,
                                  args=[cluster_manager_client, myself])
            t1.start()

    L.info(f'egg_pair started at port={port}, transfer_port={transfer_port}')

    run = True

    def exit_gracefully(signum, frame):
        nonlocal run
        run = False
        L.info(
            f'egg_pair {args.processor_id} at port={port}, transfer_port={transfer_port}, pid={pid} receives signum={signal.getsignal(signum)}, stopping gracefully.'
        )

    signal.signal(signal.SIGTERM, exit_gracefully)
    signal.signal(signal.SIGINT, exit_gracefully)

    while run:
        time.sleep(1)

    L.info(f'sending exit heartbeat to cm')
    if cluster_manager:
        myself._status = ProcessorStatus.STOPPED
        cluster_manager_client.heartbeat(myself)

    GrpcChannelFactory.shutdown_all_now()

    L.info(f'closing RocksDB open dbs')
    #todo:1: move to RocksdbAdapter and provide a cleanup method
    from eggroll.core.pair_store.rocksdb import RocksdbAdapter
    for path, db in RocksdbAdapter.db_dict.items():
        del db

    gc.collect()

    L.info(f'system metric at exit: {get_system_metric(1)}')
    L.info(
        f'egg_pair {args.processor_id} at port={port}, transfer_port={transfer_port}, pid={pid} stopped gracefully'
    )
Ejemplo n.º 15
0
#  See the License for the specific language governing permissions and
#  limitations under the License.

import queue
import threading
import unittest
from concurrent.futures import ThreadPoolExecutor

from eggroll.core.conf_keys import TransferConfKeys
from eggroll.core.datastructure.broker import FifoBroker, BrokerClosed
from eggroll.core.meta_model import ErEndpoint
from eggroll.core.transfer.transfer_service import TransferService, \
    GrpcTransferService, TransferClient

transfer_port = 20002
transfer_endpont = ErEndpoint('localhost', transfer_port)


class TestTransfer(unittest.TestCase):
    def setUp(self) -> None:
        self.__executor_pool = ThreadPoolExecutor(max_workers=5)

    def test_recv(self):
        def start_server():
            transfer_service = GrpcTransferService()

            options = {
                TransferConfKeys.CONFKEY_TRANSFER_SERVICE_PORT: transfer_port
            }
            transfer_service.start(options=options)
Ejemplo n.º 16
0
def serve(args):
    prefix = 'v1/egg-pair'

    set_data_dir(args.data_dir)

    CommandRouter.get_instance().register(
        service_name=f"{prefix}/runTask",
        route_to_module_name="eggroll.roll_pair.egg_pair",
        route_to_class_name="EggPair",
        route_to_method_name="run_task")

    command_server = grpc.server(
        futures.ThreadPoolExecutor(max_workers=500,
                                   thread_name_prefix="grpc_server"),
        options=[("grpc.max_metadata_size", 32 << 20),
                 (cygrpc.ChannelArgKey.max_send_message_length, 2 << 30 - 1),
                 (cygrpc.ChannelArgKey.max_receive_message_length, 2 << 30 - 1)
                 ])

    command_servicer = CommandServicer()
    command_pb2_grpc.add_CommandServiceServicer_to_server(
        command_servicer, command_server)

    transfer_servicer = GrpcTransferServicer()

    port = args.port
    transfer_port = args.transfer_port

    port = command_server.add_insecure_port(f'[::]:{port}')

    if transfer_port == "-1":
        transfer_server = command_server
        transfer_port = port
        transfer_pb2_grpc.add_TransferServiceServicer_to_server(
            transfer_servicer, transfer_server)
    else:
        transfer_server = grpc.server(
            futures.ThreadPoolExecutor(max_workers=500,
                                       thread_name_prefix="transfer_server"),
            options=[
                (cygrpc.ChannelArgKey.max_send_message_length, 2 << 30 - 1),
                (cygrpc.ChannelArgKey.max_receive_message_length, 2 << 30 - 1),
                ('grpc.max_metadata_size', 32 << 20)
            ])
        transfer_port = transfer_server.add_insecure_port(
            f'[::]:{transfer_port}')
        transfer_pb2_grpc.add_TransferServiceServicer_to_server(
            transfer_servicer, transfer_server)
        transfer_server.start()

    L.info(
        f"starting egg_pair service, port:{port}, transfer port: {transfer_port}"
    )
    command_server.start()

    cluster_manager = args.cluster_manager
    myself = None
    cluster_manager_client = None
    if cluster_manager:
        session_id = args.session_id

        if not session_id:
            raise ValueError('session id is missing')
        options = {SessionConfKeys.CONFKEY_SESSION_ID: args.session_id}
        myself = ErProcessor(id=int(args.processor_id),
                             server_node_id=int(args.server_node_id),
                             processor_type=ProcessorTypes.EGG_PAIR,
                             command_endpoint=ErEndpoint(host='localhost',
                                                         port=port),
                             transfer_endpoint=ErEndpoint(host='localhost',
                                                          port=transfer_port),
                             pid=os.getpid(),
                             options=options,
                             status=ProcessorStatus.RUNNING)

        cluster_manager_host, cluster_manager_port = cluster_manager.strip(
        ).split(':')

        L.info(f'cluster_manager: {cluster_manager}')
        cluster_manager_client = ClusterManagerClient(
            options={
                ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_HOST:
                cluster_manager_host,
                ClusterManagerConfKeys.CONFKEY_CLUSTER_MANAGER_PORT:
                cluster_manager_port
            })
        cluster_manager_client.heartbeat(myself)

    L.info(f'egg_pair started at port {port}, transfer_port {transfer_port}')

    run = True

    def exit_gracefully(signum, frame):
        nonlocal run
        run = False

    signal.signal(signal.SIGTERM, exit_gracefully)
    signal.signal(signal.SIGINT, exit_gracefully)

    import time

    while run:
        time.sleep(1)

    if cluster_manager:
        myself._status = ProcessorStatus.STOPPED
        cluster_manager_client.heartbeat(myself)

    L.info(
        f'egg_pair at port {port}, transfer_port {transfer_port} stopped gracefully'
    )