Esempio n. 1
0
    def __init__(self, eggroll_session):
        if _EggRoll.instance is not None:
            raise EnvironmentError("eggroll should be initialized only once")

        host = eggroll_session.get_conf(EGGROLL_ROLL_HOST)
        port = eggroll_session.get_conf(EGGROLL_ROLL_PORT)
        self.chunk_size = eggroll_session.get_chunk_size()
        self.host = host
        self.port = port

        self.channel = grpc.insecure_channel(
            target="{}:{}".format(host, port),
            options=[('grpc.max_send_message_length', -1),
                     ('grpc.max_receive_message_length', -1)])
        self.session_id = eggroll_session.get_session_id()
        self.kv_stub = kv_pb2_grpc.KVServiceStub(self.channel)
        self.proc_stub = processor_pb2_grpc.ProcessServiceStub(self.channel)
        self.session_stub = node_manager_pb2_grpc.SessionServiceStub(
            self.channel)
        self.eggroll_session = eggroll_session
        _EggRoll.instance = self

        self.session_stub.getOrCreateSession(
            self.eggroll_session.to_protobuf())

        # todo: move to eggrollSession
        try:
            self.host_name = socket.gethostname()
            self.host_ip = socket.gethostbyname(self.host_name)
        except socket.gaierror as e:
            self.host_name = 'unknown'
            self.host_ip = 'unknown'
Esempio n. 2
0
    def init():
        if EggRoll.init_flag:
            return
        config = file_utils.load_json_conf('eggroll/conf/mock_roll.json')
        egg_ids = config.get('eggs')

        for egg_id in egg_ids:
            target = config.get('storage').get(egg_id)
            channel = grpc.insecure_channel(
                target,
                options=[('grpc.max_send_message_length', -1),
                         ('grpc.max_receive_message_length', -1)])
            EggRoll.egg_list.append(kv_pb2_grpc.KVServiceStub(channel))
            procs = config.get('procs').get(egg_id)
            for proc in procs:
                _channel = grpc.insecure_channel(
                    proc,
                    options=[('grpc.max_send_message_length', -1),
                             ('grpc.max_receive_message_length', -1)])
                _stub = processor_pb2_grpc.ProcessServiceStub(_channel)
                proc_info = (_channel, _stub)
                i = len(EggRoll.proc_list)
                EggRoll.proc_egg_map[i] = int(egg_id) - 1
                EggRoll.proc_list.append(proc_info)
        EggRoll.init_flag = True
Esempio n. 3
0
    def action(_table, host, port, chunked_iter, use_serialize):
        _table.set_gc_disable()
        _EggRoll.get_instance().channel = grpc.insecure_channel(
            target="{}:{}".format(host, port),
            options=[('grpc.max_send_message_length', -1),
                     ('grpc.max_receive_message_length', -1)])
        with _EggRoll.get_instance().get_channel() as child_channle:
            _EggRoll.get_instance().kv_stub = kv_pb2_grpc.KVServiceStub(
                child_channle)
            _EggRoll.get_instance(
            ).proc_stub = processor_pb2_grpc.ProcessServiceStub(child_channle)

            operand = _EggRoll.get_instance().__generate_operand(
                chunked_iter, use_serialize)
            _EggRoll.get_instance().kv_stub.putAll(operand,
                                                   metadata=_get_meta(_table))
Esempio n. 4
0
    def __init__(self, job_id, host, port, eggroll_context):
        if _EggRoll.instance is not None:
            raise EnvironmentError("eggroll should be initialized only once")
        self.channel = grpc.insecure_channel(target="{}:{}".format(host, port),
                                             options=[('grpc.max_send_message_length', -1),
                                                      ('grpc.max_receive_message_length', -1)])
        self.job_id = job_id
        self.kv_stub = kv_pb2_grpc.KVServiceStub(self.channel)
        self.proc_stub = processor_pb2_grpc.ProcessServiceStub(self.channel)
        self.eggroll_context = eggroll_context
        _EggRoll.instance = self

        # todo: move to eggrollContext
        try:
            self.host_name = socket.gethostname()
            self.host_ip = socket.gethostbyname(self.host_name)
        except socket.gaierror as e:
            self.host_name = 'unknown'
            self.host_ip = 'unknown'
Esempio n. 5
0
    def put_all(self,
                _table,
                data: Iterable,
                use_serialize=True,
                chunk_size=100000,
                skip_chunk=0,
                include_key=True,
                single_process=False):
        global gc_tag
        gc_tag = False
        skipped_chunk = 0

        if include_key == True:
            kvs = data
        else:
            kvs = enumerate(data)

        chunk_size = self.chunk_size
        if chunk_size < CHUNK_SIZE_MIN:
            chunk_size = CHUNK_SIZE_DEFAULT

        host = self.host
        port = self.port
        process_pool_size = cpu_count()
        if single_process is True:
            for chunked_iter in split_every_yield(kvs, chunk_size=chunk_size):
                if skipped_chunk < skip_chunk:
                    skipped_chunk += 1
                else:
                    self.kv_stub.putAll(self.__generate_operand(
                        chunked_iter, use_serialize=use_serialize),
                                        metadata=_get_meta(_table))
        else:
            _EggRoll.get_instance().get_channel().close()
            with ProcessPoolExecutor(process_pool_size) as executor:
                if isinstance(kvs, Sequence):  # Sequence
                    for chunked_iter in split_every_yield(kvs, chunk_size):
                        if skipped_chunk < skip_chunk:
                            skipped_chunk += 1
                        else:
                            future = executor.submit(_EggRoll.action, _table,
                                                     host, port, chunked_iter,
                                                     use_serialize)
                elif isinstance(kvs, Generator) or isinstance(data, Generator):
                    index = 0
                    while True:
                        chunked_iter = split_every_generator(
                            kvs, chunk_size, skip_chunk)
                        if chunked_iter == []:
                            break
                        future = executor.submit(_EggRoll.action, _table, host,
                                                 port, chunked_iter,
                                                 use_serialize)
                        index += 1
                else:  # other Iterable types
                    try:
                        index = 0
                        while True:
                            chunked_iter = split_every(kvs, index, chunk_size,
                                                       skip_chunk)
                            chunked_iter_ = copy.deepcopy(chunked_iter)
                            next(chunked_iter_)
                            future = executor.submit(_EggRoll.action, _table,
                                                     host, port, chunked_iter,
                                                     use_serialize)
                            index += 1
                    except StopIteration as e:
                        LOGGER.debug("StopIteration")
                executor.shutdown(wait=True)

            _EggRoll.get_instance().channel = grpc.insecure_channel(
                target="{}:{}".format(host, port),
                options=[('grpc.max_send_message_length', -1),
                         ('grpc.max_receive_message_length', -1)])
            _EggRoll.get_instance().kv_stub = kv_pb2_grpc.KVServiceStub(
                _EggRoll.get_instance().get_channel())
            _EggRoll.get_instance(
            ).proc_stub = processor_pb2_grpc.ProcessServiceStub(
                _EggRoll.get_instance().get_channel())
            _EggRoll.get_instance(
            ).session_stub = node_manager_pb2_grpc.SessionServiceStub(
                _EggRoll.get_instance().get_channel())
        gc_tag = True