Esempio n. 1
0
    def start(self, isMaster, environ={}, isLocal=False):
        if getattr(self, 'started', False):
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        if isMaster:
            root = '/tmp/dpark'
            if not isLocal:
                root = os.environ.get("DPARK_SHARE_DIR")
            if not root:
                raise Exception("no shuffle dir exists")
            if not os.path.exists(root):
                os.mkdir(root, 0777)
                os.chmod(root, 0777)  # because of umask
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = os.path.join(root, name)
            os.makedirs(self.workdir)
            self.environ['WORKDIR'] = self.workdir
        else:
            self.environ.update(environ)

        from cache import CacheTracker
        self.cacheTracker = CacheTracker(isMaster)

        from shuffle import LocalFileShuffle, MapOutputTracker, SimpleShuffleFetcher
        LocalFileShuffle.initialize(isMaster)
        self.mapOutputTracker = MapOutputTracker(isMaster)
        self.shuffleFetcher = SimpleShuffleFetcher()

        from broadcast import Broadcast
        Broadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")
Esempio n. 2
0
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(),
                isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            if os.environ.has_key('DPARK_WORK_DIR'):
                root = os.environ['DPARK_WORK_DIR']
            else:
                root = '/tmp/dpark'

            if not os.path.exists(root):
                os.mkdir(root, 0777)
                os.chmod(root, 0777) # because of umask
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                socket.gethostname(), os.getpid())
            self.workdir = os.path.join(root, name)
            os.makedirs(self.workdir)
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from cache import CacheTracker, LocalCacheTracker
        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from broadcast import Broadcast
        Broadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started") 
Esempio n. 3
0
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            if os.environ.has_key('DPARK_WORK_DIR'):
                root = os.environ['DPARK_WORK_DIR']
            else:
                root = '/tmp/dpark'

            if not os.path.exists(root):
                os.mkdir(root, 0777)
                os.chmod(root, 0777)  # because of umask
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = os.path.join(root, name)
            os.makedirs(self.workdir)
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from cache import CacheTracker, LocalCacheTracker
        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from broadcast import Broadcast
        Broadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")
Esempio n. 4
0
    def start(self, isMaster, environ={}, isLocal=False, port=None):
        if getattr(self, 'started', False):
            return
        logger.debug("start env in %s: %s %s", os.getpid(),
                isMaster, environ)
        if isMaster:
            if isLocal:
                root = '/tmp/dpark'
                self.dfs = True
            elif os.environ.has_key('DPARK_SHARE_DIR'):
                root = os.environ['DPARK_SHARE_DIR']
                self.dfs = True
            elif os.environ.has_key('DPARK_WORK_DIR'):
                root = os.environ['DPARK_WORK_DIR']
                self.dfs = False
            else:
                raise Exception("no shuffle dir exists")
            if not os.path.exists(root):
                os.mkdir(root, 0777)
                os.chmod(root, 0777) # because of umask
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                socket.gethostname(), os.getpid())
            self.workdir = os.path.join(root, name)
            os.makedirs(self.workdir)
            self.environ['WORKDIR'] = self.workdir
            self.environ['DPARK_HAS_DFS'] = str(self.dfs)
        else:
            self.environ.update(environ)
            self.dfs = (self.environ['DPARK_HAS_DFS'] == 'True')

        from cache import CacheTracker
        self.cacheTracker = CacheTracker(isMaster)
        
        from shuffle import LocalFileShuffle, MapOutputTracker, SimpleShuffleFetcher
        LocalFileShuffle.initialize(isMaster, port)
        self.mapOutputTracker = MapOutputTracker(isMaster)
        self.shuffleFetcher = SimpleShuffleFetcher()

        from broadcast import Broadcast
        Broadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started") 
Esempio n. 5
0
File: env.py Progetto: haiger/dpark
    def start(self, isMaster, environ={}, isLocal=False):
        if getattr(self, "started", False):
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        if isMaster:
            root = "/tmp/dpark"
            if not isLocal:
                root = os.environ.get("DPARK_SHARE_DIR")
            if not root:
                raise Exception("no shuffle dir exists")
            if not os.path.exists(root):
                os.mkdir(root, 0777)
                os.chmod(root, 0777)  # because of umask
            name = "%s-%s-%d" % (time.strftime("%Y%m%d-%H%M%S"), socket.gethostname(), os.getpid())
            self.workdir = os.path.join(root, name)
            os.makedirs(self.workdir)
            self.environ["WORKDIR"] = self.workdir
        else:
            self.environ.update(environ)

        from cache import CacheTracker

        self.cacheTracker = CacheTracker(isMaster)

        from shuffle import LocalFileShuffle, MapOutputTracker, SimpleShuffleFetcher

        LocalFileShuffle.initialize(isMaster)
        self.mapOutputTracker = MapOutputTracker(isMaster)
        self.shuffleFetcher = SimpleShuffleFetcher()

        from broadcast import Broadcast

        Broadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")