def __init__(self, eggroll_session): if _EggRoll.instance is not None: raise EnvironmentError("eggroll should be initialized only once") host = eggroll_session.get_conf(EGGROLL_ROLL_HOST) port = eggroll_session.get_conf(EGGROLL_ROLL_PORT) self.chunk_size = eggroll_session.get_chunk_size() self.host = host self.port = port self.channel = grpc.insecure_channel( target="{}:{}".format(host, port), options=[('grpc.max_send_message_length', -1), ('grpc.max_receive_message_length', -1)]) self.session_id = eggroll_session.get_session_id() self.kv_stub = kv_pb2_grpc.KVServiceStub(self.channel) self.proc_stub = processor_pb2_grpc.ProcessServiceStub(self.channel) self.session_stub = node_manager_pb2_grpc.SessionServiceStub( self.channel) self.eggroll_session = eggroll_session _EggRoll.instance = self self.session_stub.getOrCreateSession( self.eggroll_session.to_protobuf()) # todo: move to eggrollSession try: self.host_name = socket.gethostname() self.host_ip = socket.gethostbyname(self.host_name) except socket.gaierror as e: self.host_name = 'unknown' self.host_ip = 'unknown'
def init(): if EggRoll.init_flag: return config = file_utils.load_json_conf('eggroll/conf/mock_roll.json') egg_ids = config.get('eggs') for egg_id in egg_ids: target = config.get('storage').get(egg_id) channel = grpc.insecure_channel( target, options=[('grpc.max_send_message_length', -1), ('grpc.max_receive_message_length', -1)]) EggRoll.egg_list.append(kv_pb2_grpc.KVServiceStub(channel)) procs = config.get('procs').get(egg_id) for proc in procs: _channel = grpc.insecure_channel( proc, options=[('grpc.max_send_message_length', -1), ('grpc.max_receive_message_length', -1)]) _stub = processor_pb2_grpc.ProcessServiceStub(_channel) proc_info = (_channel, _stub) i = len(EggRoll.proc_list) EggRoll.proc_egg_map[i] = int(egg_id) - 1 EggRoll.proc_list.append(proc_info) EggRoll.init_flag = True
def __init__(self, options): super().__init__(options) self.options = options host = options["host"] port = options["port"] self.channel = grpc.insecure_channel( target="{}:{}".format(host, port), options=[('grpc.max_send_message_length', -1), ('grpc.max_receive_message_length', -1)]) self.kv_stub = kv_pb2_grpc.KVServiceStub(self.channel)
def action(_table, host, port, chunked_iter, use_serialize): _table.set_gc_disable() _EggRoll.get_instance().channel = grpc.insecure_channel( target="{}:{}".format(host, port), options=[('grpc.max_send_message_length', -1), ('grpc.max_receive_message_length', -1)]) with _EggRoll.get_instance().get_channel() as child_channle: _EggRoll.get_instance().kv_stub = kv_pb2_grpc.KVServiceStub( child_channle) _EggRoll.get_instance( ).proc_stub = processor_pb2_grpc.ProcessServiceStub(child_channle) operand = _EggRoll.get_instance().__generate_operand( chunked_iter, use_serialize) _EggRoll.get_instance().kv_stub.putAll(operand, metadata=_get_meta(_table))
def __init__(self, job_id, host, port, eggroll_context): if _EggRoll.instance is not None: raise EnvironmentError("eggroll should be initialized only once") self.channel = grpc.insecure_channel(target="{}:{}".format(host, port), options=[('grpc.max_send_message_length', -1), ('grpc.max_receive_message_length', -1)]) self.job_id = job_id self.kv_stub = kv_pb2_grpc.KVServiceStub(self.channel) self.proc_stub = processor_pb2_grpc.ProcessServiceStub(self.channel) self.eggroll_context = eggroll_context _EggRoll.instance = self # todo: move to eggrollContext try: self.host_name = socket.gethostname() self.host_ip = socket.gethostbyname(self.host_name) except socket.gaierror as e: self.host_name = 'unknown' self.host_ip = 'unknown'
def put_all(self, _table, data: Iterable, use_serialize=True, chunk_size=100000, skip_chunk=0, include_key=True, single_process=False): global gc_tag gc_tag = False skipped_chunk = 0 if include_key == True: kvs = data else: kvs = enumerate(data) chunk_size = self.chunk_size if chunk_size < CHUNK_SIZE_MIN: chunk_size = CHUNK_SIZE_DEFAULT host = self.host port = self.port process_pool_size = cpu_count() if single_process is True: for chunked_iter in split_every_yield(kvs, chunk_size=chunk_size): if skipped_chunk < skip_chunk: skipped_chunk += 1 else: self.kv_stub.putAll(self.__generate_operand( chunked_iter, use_serialize=use_serialize), metadata=_get_meta(_table)) else: _EggRoll.get_instance().get_channel().close() with ProcessPoolExecutor(process_pool_size) as executor: if isinstance(kvs, Sequence): # Sequence for chunked_iter in split_every_yield(kvs, chunk_size): if skipped_chunk < skip_chunk: skipped_chunk += 1 else: future = executor.submit(_EggRoll.action, _table, host, port, chunked_iter, use_serialize) elif isinstance(kvs, Generator) or isinstance(data, Generator): index = 0 while True: chunked_iter = split_every_generator( kvs, chunk_size, skip_chunk) if chunked_iter == []: break future = executor.submit(_EggRoll.action, _table, host, port, chunked_iter, use_serialize) index += 1 else: # other Iterable types try: index = 0 while True: chunked_iter = split_every(kvs, index, chunk_size, skip_chunk) chunked_iter_ = copy.deepcopy(chunked_iter) next(chunked_iter_) future = executor.submit(_EggRoll.action, _table, host, port, chunked_iter, use_serialize) index += 1 except StopIteration as e: LOGGER.debug("StopIteration") executor.shutdown(wait=True) _EggRoll.get_instance().channel = grpc.insecure_channel( target="{}:{}".format(host, port), options=[('grpc.max_send_message_length', -1), ('grpc.max_receive_message_length', -1)]) _EggRoll.get_instance().kv_stub = kv_pb2_grpc.KVServiceStub( _EggRoll.get_instance().get_channel()) _EggRoll.get_instance( ).proc_stub = processor_pb2_grpc.ProcessServiceStub( _EggRoll.get_instance().get_channel()) _EggRoll.get_instance( ).session_stub = node_manager_pb2_grpc.SessionServiceStub( _EggRoll.get_instance().get_channel()) gc_tag = True