def test(): from dpark.util import compress logging.basicConfig(level=logging.DEBUG) from dpark.env import env env.start(True) l = [] for i in range(10): d = zip(range(10000), range(10000)) random.shuffle(d) l.append(SortedItems(d)) hl = heap_merged(l, lambda x, y: x + y, MAX_SHUFFLE_MEMORY) for i in range(10): print i, hl.next() path = LocalFileShuffle.getOutputFile(1, 0, 0) d = compress(cPickle.dumps({'key': 'value'}, -1)) f = open(path, 'w') f.write('p' + struct.pack('I', 5 + len(d)) + d) f.close() uri = LocalFileShuffle.getServerUri() env.mapOutputTracker.registerMapOutputs(1, [uri]) fetcher = SimpleShuffleFetcher() def func(it): k, v = next(it) assert k == 'key' assert v == 'value' fetcher.fetch(1, 0, func) tracker = MapOutputTracker() tracker.registerMapOutputs(2, [None, uri, None, None, None]) assert tracker.getServerUris(2) == [None, uri, None, None, None] tracker.stop()
def start(self): if self.started: return self.init() env.start(True, isLocal=self.isLocal) self.scheduler.start() self.started = True atexit.register(self.stop) def handler(signm, frame): logger.error("got signal %d, exit now", signm) self.scheduler.shutdown() try: signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGHUP, handler) signal.signal(signal.SIGABRT, handler) signal.signal(signal.SIGQUIT, handler) except: pass try: from rfoo.utils import rconsole rconsole.spawn_server(locals(), 0) except ImportError: pass
def test(): from dpark.utils import compress import logging logging.basicConfig(level=logging.DEBUG) from dpark.env import env env.start() path = LocalFileShuffle.getOutputFile(1, 0, 0) d = compress(pickle.dumps({'key': 'value'}, -1)) f = open(path, 'w') f.write(pack_header(len(d), False, False) + d) f.close() uri = LocalFileShuffle.getServerUri() env.mapOutputTracker.registerMapOutputs(1, [uri]) fetcher = SimpleShuffleFetcher() def func(it): k, v = next(it) assert k == 'key' assert v == 'value' fetcher.fetch(1, 0, func) tracker = MapOutputTracker() tracker.registerMapOutputs(2, [None, uri, None, None, None]) assert tracker.getServerUris(2) == [None, uri, None, None, None] tracker.stop()
def setUp(self): return env.start(True) self.sched = MesosScheduler('mesos://localhost:5050', parse_options()) self.driver = MockDriver() self.sched.driver = self.driver self.sched.start()
def test(): l = [] for i in range(10): d = zip(range(10000), range(10000)) l.append(sorted_items(d)) hl = heap_merged(l, lambda x,y:x+y) for i in range(10): print i, hl.next() import logging logging.basicConfig(level=logging.INFO) from dpark.env import env import cPickle env.start(True) path = LocalFileShuffle.getOutputFile(1, 0, 0) f = open(path, 'w') f.write(cPickle.dumps([('key','value')], -1)) f.close() uri = LocalFileShuffle.getServerUri() env.mapOutputTracker.registerMapOutputs(1, [uri]) fetcher = SimpleShuffleFetcher() def func(k,v): assert k=='key' assert v=='value' fetcher.fetch(1, 0, func) tracker = MapOutputTracker(True) tracker.registerMapOutputs(2, [None, uri, None, None, None]) assert tracker.getServerUris(2) == [None, uri, None, None, None] ntracker = MapOutputTracker(False) assert ntracker.getServerUris(2) == [None, uri, None, None, None] ntracker.stop() tracker.stop()
def start(self): if self.started: return self.init() env.start(True) self.scheduler.start() self.started = True atexit.register(self.stop) def handler(signm, frame): logger.error("got signal %d, exit now", signm) self.scheduler.shutdown() try: signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGHUP, handler) signal.signal(signal.SIGABRT, handler) signal.signal(signal.SIGQUIT, handler) except: pass try: from rfoo.utils import rconsole rconsole.spawn_server(locals(), 0) except ImportError: pass
def worker(procname, q, task_id_value, task_data): task_id_str = "task %s" % (task_id_value,) threading.current_thread().name = task_id_str setproctitle(procname) set_oom_score(100) env.start() q.put((task_id_value, run_task(task_data)))
def worker(procname, q, task_id_value, task_data): task_id_str = "task %s" % (task_id_value, ) threading.current_thread().name = task_id_str setproctitle(procname) set_oom_score(100) env.start() q.put((task_id_value, run_task(task_data)))
def run_task_in_process(task, tid, environ): from dpark.env import env env.start(False, environ) logger.debug("run task in process %s %s", task, tid) try: return run_task(task, tid) except KeyboardInterrupt: sys.exit(0)
def run_task_in_process(task, tid, environ): from dpark.env import env workdir = environ.get('WORKDIR') environ['SERVER_URI'] = 'file://%s' % workdir[0] env.start(False, environ) logger.debug("run task in process %s %s", task, tid) try: return run_task(task, tid) except KeyboardInterrupt: sys.exit(0)
def start(self): def shutdown(): self.stop() try: import dpark.web dpark.web.stop(self.web_port) except ImportError: pass if self.started: return self.init() env.start() self.scheduler.start() self.started = True _shutdown_handlers.append(shutdown) spawn_rconsole(locals())
def test(): from dpark.util import compress import logging logging.basicConfig(level=logging.DEBUG) from dpark.env import env env.start(True) l = [] for i in range(10): d = zip(range(10000), range(10000)) random.shuffle(d) l.append(SortedItems(d)) hl = heap_merged(l, lambda x, y: x + y, MAX_SHUFFLE_MEMORY) for i in range(10): print i, hl.next() path = LocalFileShuffle.getOutputFile(1, 0, 0) d = compress(cPickle.dumps({'key': 'value'}, -1)) f = open(path, 'w') f.write('p' + struct.pack('I', 5 + len(d)) + d) f.close() uri = LocalFileShuffle.getServerUri() env.mapOutputTracker.registerMapOutputs(1, [uri]) fetcher = SimpleShuffleFetcher() def func(it): k, v = next(it) assert k == 'key' assert v == 'value' fetcher.fetch(1, 0, func) tracker = MapOutputTracker() tracker.registerMapOutputs(2, [None, uri, None, None, None]) assert tracker.getServerUris(2) == [None, uri, None, None, None] tracker.stop()
def start(self): def shutdown(): self.stop() try: import dpark.web dpark.web.stop(self.web_port) except ImportError: pass if self.started: return self.init() env.start() self.scheduler.start() self.started = True _shutdown_handlers.append(shutdown) try: from rfoo.utils import rconsole rconsole.spawn_server(locals(), 0) except ImportError: pass
def init_env(args): env.start(False, args)
def init_env(args): setproctitle('dpark worker: idle') env.start(False, args)
def init_env(args, workdir): setproctitle('dpark worker: idle') env.start(False, args) threading.Thread(target=cleanup, args=[workdir]).start()
def init_env(args, port): setproctitle('dpark worker: idle') env.start(False, args, port=port)
def worker(name, q, task_id_value, task_data): setproctitle(name) env.start() q.put((task_id_value, run_task(task_data)))