예제 #1
0
파일: env.py 프로젝트: npc7/dpark
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(",")
            if isLocal:
                root = roots[0]  # for local mode
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777)  # because of umask

            name = "%s-%s-%d" % (time.strftime("%Y%m%d-%H%M%S"), socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try:
                        os.makedirs(d)
                    except OSError:
                        pass
            self.environ["WORKDIR"] = self.workdir
            self.environ["COMPRESS"] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ["COMPRESS"] != util.COMPRESS:
                raise Exception("no %s available" % self.environ["COMPRESS"])

        self.ctx = zmq.Context()

        from dpark.cache import CacheTracker, LocalCacheTracker

        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker

        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher

        # self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast

        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")
예제 #2
0
파일: env.py 프로젝트: XuYong/dpark
    def stop(self):
        if not getattr(self, 'started', False):
            return
        logger.debug("stop env in %s", os.getpid())
        self.shuffleFetcher.stop()
        self.cacheTracker.stop()
        self.mapOutputTracker.stop()
        from dpark.broadcast import TheBroadcast
        TheBroadcast.shutdown()
       
        logger.debug("cleaning workdir ...")
        for d in self.workdir:
            shutil.rmtree(d, True)
        logger.debug("done.")

        self.started = False
예제 #3
0
파일: env.py 프로젝트: wtzhuque/dpark
    def stop(self):
        if not getattr(self, 'started', False):
            return
        logger.debug("stop env in %s", os.getpid())
        self.shuffleFetcher.stop()
        self.cacheTracker.stop()
        self.mapOutputTracker.stop()
        if self.isMaster:
            self.trackerServer.stop()
        from dpark.broadcast import TheBroadcast
        TheBroadcast.shutdown()

        logger.debug("cleaning workdir ...")
        for d in self.workdir:
            shutil.rmtree(d, True)
        logger.debug("done.")

        self.started = False
예제 #4
0
 def broadcast(self, v):
     self.start()
     from dpark.broadcast import TheBroadcast
     return TheBroadcast(v, self.isLocal)
예제 #5
0
파일: env.py 프로젝트: wtzhuque/dpark
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            if isLocal:
                root = roots[0]  # for local mode
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777)  # because of umask

            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try:
                        os.makedirs(d)
                    except OSError:
                        pass
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.tracker import TrackerServer, TrackerClient
        if isMaster:
            self.trackerServer = TrackerServer()
            self.trackerServer.start()
            addr = self.trackerServer.addr
            env.register('TrackerAddr', addr)
        else:
            addr = env.get('TrackerAddr')

        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker, LocalCacheTracker
        if isLocal:
            self.cacheTracker = LocalCacheTracker()
        else:
            self.cacheTracker = CacheTracker()

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker()
        else:
            self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast
        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")