Example #1
0
def test():
    from dpark.util import compress
    logging.basicConfig(level=logging.DEBUG)
    from dpark.env import env
    env.start(True)

    l = []
    for i in range(10):
        d = zip(range(10000), range(10000))
        random.shuffle(d)
        l.append(SortedItems(d))
    hl = heap_merged(l, lambda x, y: x + y, MAX_SHUFFLE_MEMORY)
    for i in range(10):
        print i, hl.next()

    path = LocalFileShuffle.getOutputFile(1, 0, 0)
    d = compress(cPickle.dumps({'key': 'value'}, -1))
    f = open(path, 'w')
    f.write('p' + struct.pack('I', 5 + len(d)) + d)
    f.close()

    uri = LocalFileShuffle.getServerUri()
    env.mapOutputTracker.registerMapOutputs(1, [uri])
    fetcher = SimpleShuffleFetcher()

    def func(it):
        k, v = next(it)
        assert k == 'key'
        assert v == 'value'
    fetcher.fetch(1, 0, func)

    tracker = MapOutputTracker()
    tracker.registerMapOutputs(2, [None, uri, None, None, None])
    assert tracker.getServerUris(2) == [None, uri, None, None, None]
    tracker.stop()
Example #2
0
    def start(self):
        if self.started:
            return

        self.init()

        env.start(True, isLocal=self.isLocal)
        self.scheduler.start()
        self.started = True
        atexit.register(self.stop)

        def handler(signm, frame):
            logger.error("got signal %d, exit now", signm)
            self.scheduler.shutdown()

        try:
            signal.signal(signal.SIGTERM, handler)
            signal.signal(signal.SIGHUP, handler)
            signal.signal(signal.SIGABRT, handler)
            signal.signal(signal.SIGQUIT, handler)
        except:
            pass

        try:
            from rfoo.utils import rconsole
            rconsole.spawn_server(locals(), 0)
        except ImportError:
            pass
Example #3
0
def test():
    from dpark.utils import compress
    import logging
    logging.basicConfig(level=logging.DEBUG)
    from dpark.env import env
    env.start()

    path = LocalFileShuffle.getOutputFile(1, 0, 0)
    d = compress(pickle.dumps({'key': 'value'}, -1))
    f = open(path, 'w')
    f.write(pack_header(len(d), False, False) + d)
    f.close()

    uri = LocalFileShuffle.getServerUri()
    env.mapOutputTracker.registerMapOutputs(1, [uri])
    fetcher = SimpleShuffleFetcher()

    def func(it):
        k, v = next(it)
        assert k == 'key'
        assert v == 'value'

    fetcher.fetch(1, 0, func)

    tracker = MapOutputTracker()
    tracker.registerMapOutputs(2, [None, uri, None, None, None])
    assert tracker.getServerUris(2) == [None, uri, None, None, None]
    tracker.stop()
Example #4
0
 def setUp(self):
     return
     env.start(True)
     self.sched = MesosScheduler('mesos://localhost:5050', parse_options())
     self.driver = MockDriver()
     self.sched.driver = self.driver
     self.sched.start()
Example #5
0
def test():
    l = []
    for i in range(10):
        d = zip(range(10000), range(10000))
        l.append(sorted_items(d))
    hl = heap_merged(l, lambda x,y:x+y)
    for i in range(10):
        print i, hl.next()

    import logging
    logging.basicConfig(level=logging.INFO)
    from dpark.env import env
    import cPickle
    env.start(True)

    path = LocalFileShuffle.getOutputFile(1, 0, 0)
    f = open(path, 'w')
    f.write(cPickle.dumps([('key','value')], -1))
    f.close()

    uri = LocalFileShuffle.getServerUri()
    env.mapOutputTracker.registerMapOutputs(1, [uri])
    fetcher = SimpleShuffleFetcher()
    def func(k,v):
        assert k=='key'
        assert v=='value'
    fetcher.fetch(1, 0, func)

    tracker = MapOutputTracker(True)
    tracker.registerMapOutputs(2, [None, uri, None, None, None])
    assert tracker.getServerUris(2) == [None, uri, None, None, None]
    ntracker = MapOutputTracker(False)
    assert ntracker.getServerUris(2) == [None, uri, None, None, None]
    ntracker.stop()
    tracker.stop()
Example #6
0
    def start(self):
        if self.started:
            return

        self.init()

        env.start(True)
        self.scheduler.start()
        self.started = True
        atexit.register(self.stop)

        def handler(signm, frame):
            logger.error("got signal %d, exit now", signm)
            self.scheduler.shutdown()
        try:
            signal.signal(signal.SIGTERM, handler)
            signal.signal(signal.SIGHUP, handler)
            signal.signal(signal.SIGABRT, handler)
            signal.signal(signal.SIGQUIT, handler)
        except: pass

        try:
            from rfoo.utils import rconsole
            rconsole.spawn_server(locals(), 0)
        except ImportError:
            pass
Example #7
0
 def worker(procname, q, task_id_value, task_data):
     task_id_str = "task %s" % (task_id_value,)
     threading.current_thread().name = task_id_str
     setproctitle(procname)
     set_oom_score(100)
     env.start()
     q.put((task_id_value, run_task(task_data)))
Example #8
0
 def setUp(self):
     return
     env.start(True)
     self.sched = MesosScheduler('mesos://localhost:5050', parse_options())
     self.driver = MockDriver()
     self.sched.driver = self.driver
     self.sched.start()
Example #9
0
def test():
    from dpark.utils import compress
    import logging
    logging.basicConfig(level=logging.DEBUG)
    from dpark.env import env
    env.start()

    path = LocalFileShuffle.getOutputFile(1, 0, 0)
    d = compress(pickle.dumps({'key': 'value'}, -1))
    f = open(path, 'w')
    f.write(pack_header(len(d), False, False) + d)
    f.close()

    uri = LocalFileShuffle.getServerUri()
    env.mapOutputTracker.registerMapOutputs(1, [uri])
    fetcher = SimpleShuffleFetcher()

    def func(it):
        k, v = next(it)
        assert k == 'key'
        assert v == 'value'

    fetcher.fetch(1, 0, func)

    tracker = MapOutputTracker()
    tracker.registerMapOutputs(2, [None, uri, None, None, None])
    assert tracker.getServerUris(2) == [None, uri, None, None, None]
    tracker.stop()
Example #10
0
 def worker(procname, q, task_id_value, task_data):
     task_id_str = "task %s" % (task_id_value, )
     threading.current_thread().name = task_id_str
     setproctitle(procname)
     set_oom_score(100)
     env.start()
     q.put((task_id_value, run_task(task_data)))
Example #11
0
def test():
    l = []
    for i in range(10):
        d = zip(range(10000), range(10000))
        l.append(sorted_items(d))
    hl = heap_merged(l, lambda x,y:x+y)
    for i in range(10):
        print i, hl.next()

    import logging
    logging.basicConfig(level=logging.INFO)
    from dpark.env import env
    import cPickle
    env.start(True)

    path = LocalFileShuffle.getOutputFile(1, 0, 0)
    f = open(path, 'w')
    f.write(cPickle.dumps([('key','value')], -1))
    f.close()

    uri = LocalFileShuffle.getServerUri()
    env.mapOutputTracker.registerMapOutputs(1, [uri])
    fetcher = SimpleShuffleFetcher()
    def func(k,v):
        assert k=='key'
        assert v=='value'
    fetcher.fetch(1, 0, func)

    tracker = MapOutputTracker(True)
    tracker.registerMapOutputs(2, [None, uri, None, None, None])
    assert tracker.getServerUris(2) == [None, uri, None, None, None]
    ntracker = MapOutputTracker(False)
    assert ntracker.getServerUris(2) == [None, uri, None, None, None]
    ntracker.stop()
    tracker.stop()
Example #12
0
def run_task_in_process(task, tid, environ):
    from dpark.env import env
    env.start(False, environ)

    logger.debug("run task in process %s %s", task, tid)
    try:
        return run_task(task, tid)
    except KeyboardInterrupt:
        sys.exit(0)
Example #13
0
def run_task_in_process(task, tid, environ):
    from dpark.env import env
    env.start(False, environ)

    logger.debug("run task in process %s %s", task, tid)
    try:
        return run_task(task, tid)
    except KeyboardInterrupt:
        sys.exit(0)
Example #14
0
def run_task_in_process(task, tid, environ):
    from dpark.env import env
    workdir = environ.get('WORKDIR')
    environ['SERVER_URI'] = 'file://%s' % workdir[0]
    env.start(False, environ)

    logger.debug("run task in process %s %s", task, tid)
    try:
        return run_task(task, tid)
    except KeyboardInterrupt:
        sys.exit(0)
Example #15
0
def run_task_in_process(task, tid, environ):
    from dpark.env import env
    workdir = environ.get('WORKDIR')
    environ['SERVER_URI'] = 'file://%s' % workdir[0]
    env.start(False, environ)

    logger.debug("run task in process %s %s", task, tid)
    try:
        return run_task(task, tid)
    except KeyboardInterrupt:
        sys.exit(0)
Example #16
0
    def start(self):
        def shutdown():
            self.stop()
            try:
                import dpark.web
                dpark.web.stop(self.web_port)
            except ImportError:
                pass

        if self.started:
            return

        self.init()

        env.start()
        self.scheduler.start()
        self.started = True
        _shutdown_handlers.append(shutdown)

        spawn_rconsole(locals())
Example #17
0
def test():
    from dpark.util import compress
    import logging
    logging.basicConfig(level=logging.DEBUG)
    from dpark.env import env
    env.start(True)

    l = []
    for i in range(10):
        d = zip(range(10000), range(10000))
        random.shuffle(d)
        l.append(SortedItems(d))
    hl = heap_merged(l, lambda x, y: x + y, MAX_SHUFFLE_MEMORY)
    for i in range(10):
        print i, hl.next()

    path = LocalFileShuffle.getOutputFile(1, 0, 0)
    d = compress(cPickle.dumps({'key': 'value'}, -1))
    f = open(path, 'w')
    f.write('p' + struct.pack('I', 5 + len(d)) + d)
    f.close()

    uri = LocalFileShuffle.getServerUri()
    env.mapOutputTracker.registerMapOutputs(1, [uri])
    fetcher = SimpleShuffleFetcher()

    def func(it):
        k, v = next(it)
        assert k == 'key'
        assert v == 'value'

    fetcher.fetch(1, 0, func)

    tracker = MapOutputTracker()
    tracker.registerMapOutputs(2, [None, uri, None, None, None])
    assert tracker.getServerUris(2) == [None, uri, None, None, None]
    tracker.stop()
Example #18
0
    def start(self):
        def shutdown():
            self.stop()
            try:
                import dpark.web
                dpark.web.stop(self.web_port)
            except ImportError:
                pass

        if self.started:
            return

        self.init()

        env.start()
        self.scheduler.start()
        self.started = True
        _shutdown_handlers.append(shutdown)

        try:
            from rfoo.utils import rconsole
            rconsole.spawn_server(locals(), 0)
        except ImportError:
            pass
Example #19
0
def init_env(args):
    env.start(False, args)
Example #20
0
def init_env(args):
    setproctitle('dpark worker: idle')
    env.start(False, args)
Example #21
0
def init_env(args, workdir):
    setproctitle('dpark worker: idle')
    env.start(False, args)
    threading.Thread(target=cleanup, args=[workdir]).start()
Example #22
0
def init_env(args, port):
    setproctitle('dpark worker: idle')
    env.start(False, args, port=port)
Example #23
0
 def worker(name, q, task_id_value, task_data):
     setproctitle(name)
     env.start()
     q.put((task_id_value, run_task(task_data)))
Example #24
0
def init_env(args):
    env.start(False, args)