Ejemplo n.º 1
0
    def start(self):
        if self.started:
            return
        self.started = True
        logger.debug("start env in %s", os.getpid())
        for d in self.workdir:
            util.mkdir_p(d)

        if 'TRACKER_ADDR' not in self.environ:
            from dpark.tracker import TrackerServer
            trackerServer = self.trackerServer = TrackerServer()
            self.trackerServer.start()
            self.register('TRACKER_ADDR', trackerServer.addr)

        from dpark.tracker import TrackerClient
        addr = self.get('TRACKER_ADDR')
        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import MapOutputTracker
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import ParallelShuffleFetcher
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_guide_manager, GUIDE_ADDR
        if GUIDE_ADDR not in self.environ:
            start_guide_manager()

        logger.debug("env started")
Ejemplo n.º 2
0
    def start(self, isMaster, environ={}):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try:
                        os.makedirs(d)
                    except OSError:
                        pass
            self.environ['SERVER_URI'] = 'file://' + self.workdir[0]
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.tracker import TrackerServer, TrackerClient
        if isMaster:
            self.trackerServer = TrackerServer()
            self.trackerServer.start()
            addr = self.trackerServer.addr
            env.register('TrackerAddr', addr)
        else:
            addr = env.get('TrackerAddr')

        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_manager
        start_manager(isMaster)

        self.started = True
        logger.debug("env started")
Ejemplo n.º 3
0
Archivo: env.py Proyecto: npc7/dpark
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(),
                isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            if isLocal:
                root = roots[0] # for local mode 
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777) # because of umask

            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try: os.makedirs(d)
                    except OSError: pass
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.cache import CacheTracker, LocalCacheTracker
        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast
        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")
Ejemplo n.º 4
0
    def start_slave(self):
        """Called after env is updated."""
        if self.slave_started:
            return
        self.slave_started = True

        self._start_common()

        compress = self.get(self.COMPRESS)
        if compress != utils.COMPRESS:
            raise Exception("no %s available" % compress)

        # init clients
        from dpark.tracker import TrackerClient
        self.trackerClient = TrackerClient(self.get(self.TRACKER_ADDR))

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        self.workdir.init(env.get(self.DPARK_ID))
        logger.debug("env started")