def init(job_id=None, mode: WorkMode = WorkMode.STANDALONE, naming_policy: NamingPolicy = NamingPolicy.DEFAULT): if RuntimeInstance.EGGROLL: return if job_id is None: job_id = str(uuid.uuid1()) LoggerFactory.setDirectory() else: LoggerFactory.setDirectory( os.path.join(file_utils.get_project_base_directory(), 'logs', job_id)) RuntimeInstance.MODE = mode eggroll_context = EggRollContext(naming_policy=naming_policy) if mode == WorkMode.STANDALONE: from eggroll.api.standalone.eggroll import Standalone RuntimeInstance.EGGROLL = Standalone(job_id=job_id, eggroll_context=eggroll_context) elif mode == WorkMode.CLUSTER: from eggroll.api.cluster.eggroll import _EggRoll from eggroll.api.cluster.eggroll import init as c_init c_init(job_id, eggroll_context=eggroll_context) RuntimeInstance.EGGROLL = _EggRoll.get_instance() else: from eggroll.api.cluster import simple_roll simple_roll.init(job_id) RuntimeInstance.EGGROLL = simple_roll.EggRoll.get_instance() RuntimeInstance.EGGROLL.table("__clustercomm__", job_id, partition=10)
def build_eggroll_runtime(work_mode: WorkMode, eggroll_session: EggrollSession): if work_mode.is_standalone(): from eggroll.api.standalone.eggroll import Standalone return Standalone(eggroll_session) elif work_mode.is_cluster(): from eggroll.api.cluster.eggroll import eggroll_init, _EggRoll if _EggRoll.instance is None: return eggroll_init(eggroll_session) raise ValueError(f"work_mode: {work_mode} not supported!")
def maybe_create_eggroll_client(): """ a tricky way to set eggroll client which may be used by spark tasks. WARM: This may be removed or adjusted in future! """ import pickle from pyspark.taskcontext import TaskContext mode, eggroll_session = pickle.loads(bytes.fromhex(TaskContext.get().getLocalProperty(_EGGROLL_CLIENT))) if mode == 1: from eggroll.api.cluster.eggroll import _EggRoll if _EggRoll.instance is None: from eggroll.api import ComputingEngine from eggroll.api.cluster.eggroll import _EggRoll eggroll_runtime = _EggRoll(eggroll_session=eggroll_session) eggroll_session.set_runtime(ComputingEngine.EGGROLL_DTABLE, eggroll_runtime) else: from eggroll.api.standalone.eggroll import Standalone Standalone(eggroll_session)
def init(session_id=None, mode: WorkMode = WorkMode.STANDALONE, server_conf_path="eggroll/conf/server_conf.json", eggroll_session: EggrollSession = None, computing_engine_conf=None, naming_policy=NamingPolicy.DEFAULT, tag=None, job_id=None, chunk_size=100000): if RuntimeInstance.EGGROLL: return if not session_id: session_id = str(uuid.uuid1()) LoggerFactory.setDirectory( os.path.join(file_utils.get_project_base_directory(), 'logs', session_id)) if not job_id: job_id = session_id RuntimeInstance.MODE = mode eggroll_session = EggrollSession(session_id=session_id, naming_policy=naming_policy) if mode == WorkMode.STANDALONE: from eggroll.api.standalone.eggroll import Standalone RuntimeInstance.EGGROLL = Standalone(eggroll_session=eggroll_session) elif mode == WorkMode.CLUSTER: from eggroll.api.cluster.eggroll import _EggRoll from eggroll.api.cluster.eggroll import init as c_init c_init(session_id=session_id, server_conf_path=server_conf_path, computing_engine_conf=computing_engine_conf, naming_policy=naming_policy, tag=tag, job_id=job_id) RuntimeInstance.EGGROLL = _EggRoll.get_instance() else: from eggroll.api.cluster import simple_roll simple_roll.init(job_id) RuntimeInstance.EGGROLL = simple_roll.EggRoll.get_instance() RuntimeInstance.EGGROLL.table("__clustercomm__", job_id, partition=10)