def _get_path(self): dirs = env.get('WORKDIR') if not dirs: raise RuntimeError('No available workdir') path = os.path.join(dirs[0], 'mutable_dict') if os.path.exists(path): return path st = os.statvfs(dirs[0]) ratio = st.f_bfree * 1.0 / st.f_blocks if ratio >= 0.66: mkdir_p(path) return path for d in dirs[1:]: p = os.path.join(d, 'mutable_dict') try: os.makedirs(p) os.symlink(p, path) except OSError: pass return path raise RuntimeError('Cannot find suitable workdir')
def __init__(self, tracker, path): try: mkdir_p(path) except: pass self.tracker = tracker self.root = path
def start(self): if self.started: return self.started = True logger.debug("start env in %s", os.getpid()) for d in self.workdir: utils.mkdir_p(d) if 'TRACKER_ADDR' not in self.environ: from dpark.tracker import TrackerServer self.trackerServer = TrackerServer() self.trackerServer.start() self.register('TRACKER_ADDR', self.trackerServer.addr) from dpark.tracker import TrackerClient addr = self.get('TRACKER_ADDR') self.trackerClient = TrackerClient(addr) from dpark.cache import CacheTracker self.cacheTracker = CacheTracker() from dpark.shuffle import MapOutputTracker self.mapOutputTracker = MapOutputTracker() from dpark.shuffle import ParallelShuffleFetcher self.shuffleFetcher = ParallelShuffleFetcher(2) from dpark.broadcast import start_guide_manager, GUIDE_ADDR if GUIDE_ADDR not in self.environ: start_guide_manager() logger.debug("env started")
def getOutputFile(cls, shuffle_id, input_id, output_id, datasize=0): """ datasize < 0: disk first datasize > 0: memfirst datasize = 0: read only, use link """ shuffleDir = env.get('WORKDIR') path = os.path.join(shuffleDir[0], str(shuffle_id), str(input_id)) mkdir_p(path) p = os.path.join(path, str(output_id)) if datasize != 0 and len(shuffleDir) > 1: use_disk = datasize < 0 if datasize > 0: st = os.statvfs(path) free = st.f_bfree * st.f_bsize ratio = st.f_bfree * 1.0 / st.f_blocks use_disk = free < max(datasize, 1 << 30) or ratio < 0.66 if use_disk: d2 = os.path.join(random.choice(shuffleDir[1:]), str(shuffle_id), str(input_id)) mkdir_p(d2) p2 = os.path.join(d2, str(output_id)) if os.path.exists(p): os.remove(p) os.symlink(p2, p) if os.path.islink(p2): os.unlink(p2) # p == p2 return p2 return p
def gen_broadcast_path(work_dirs, uuid): work_dir = decide_dir(work_dirs) broadcast_dir = os.path.join(work_dir, 'broadcast') mkdir_p(broadcast_dir) uuid_path = '%s_%d' % (uuid, os.getpid()) broadcast_path = os.path.join(broadcast_dir, uuid_path) return broadcast_path
def prepare_file_open(base, subpath): path = os.path.join(base, subpath) if os.path.exists(path): os.remove(path) else: utils.mkdir_p(os.path.dirname(path)) return path
def getOutputFile(cls, shuffle_id, input_id, output_id, datasize=0): """ datasize < 0: disk first datasize > 0: memfirst datasize = 0: read only, use link """ shuffleDir = env.get('WORKDIR') path = os.path.join(shuffleDir[0], str(shuffle_id), str(input_id)) mkdir_p(path) p = os.path.join(path, str(output_id)) if datasize != 0 and len(shuffleDir) > 1: use_disk = datasize < 0 if datasize > 0: st = os.statvfs(path) free = st.f_bfree * st.f_bsize ratio = st.f_bfree * 1.0 / st.f_blocks use_disk = free < max(datasize, 1 << 30) or ratio < 0.66 if use_disk: d2 = os.path.join( random.choice(shuffleDir[1:]), str(shuffle_id), str(input_id)) mkdir_p(d2) p2 = os.path.join(d2, str(output_id)) if os.path.exists(p): os.remove(p) os.symlink(p2, p) if os.path.islink(p2): os.unlink(p2) # p == p2 return p2 return p
def export(self, tmppath, subpath): if not os.path.exists(tmppath): raise Exception("tmppath %s for % not exists", tmppath, subpath) path = os.path.join(self.main, subpath) if os.path.exists(path): logger.warning("rm old localfile %s", path) os.remove(path) dirpath = os.path.dirname(path) while not os.path.exists(dirpath): utils.mkdir_p(dirpath) logger.debug("export %s %s", tmppath, path) os.symlink(tmppath, path) return path
def _choose_disk_workdir(self): disk_dirs = list(self.workdirs[1:]) if not disk_dirs: return self.workdirs[0] random.shuffle(disk_dirs) for d in disk_dirs: try: if not os.path.exists(d): utils.mkdir_p(d) return d except: pass else: logger.warning("_choose_disk_workdir fail") return self.workdirs[0]
def init(self, dpark_id): if self.inited: return roots = conf.DPARK_WORK_DIR.split(",") self.workdirs = [] es = {} for i, root in enumerate(roots): try: while not os.path.exists(root): os.makedirs(root) os.chmod(root, 0o777) # because umask workdir = os.path.join(root, dpark_id) self.workdirs.append(workdir) except Exception as e: es[root] = e if not self.workdirs: raise Exception("workdirs not available: {}".format(es)) utils.mkdir_p(self.main) # executor will loc it self.inited = True
def registered(self, driver, executorInfo, frameworkInfo, agent_info): try: global Script (Script, cwd, python_path, osenv, self.parallel, out_logger, err_logger, logLevel, use_color, args) = marshal.loads(decode_data(executorInfo.data)) sys.path = python_path os.environ.update(osenv) setproctitle('[Executor]' + Script) prefix = formatter_message( '{MAGENTA}[%s]{RESET} ' % socket.gethostname().ljust(10), use_color) init_dpark_logger(logLevel, use_color=use_color) logging.root.setLevel(logLevel) r1 = self.stdout_redirect = Redirect(1, out_logger, prefix) sys.stdout = r1.pipe_wfile r2 = self.stderr_redirect = Redirect(2, err_logger, prefix) sys.stderr = r2.pipe_wfile spawn_rconsole(locals()) if os.path.exists(cwd): try: os.chdir(cwd) except Exception as e: logger.warning('change cwd to %s failed: %s', cwd, e) else: logger.warning('cwd (%s) not exists', cwd) self.workdir = args['WORKDIR'] main_workdir = self.workdir[0] root = os.path.dirname(main_workdir) if not os.path.exists(root): os.mkdir(root) os.chmod(root, 0o777) # because umask mkdir_p(main_workdir) self._try_flock(main_workdir) args['SERVER_URI'] = startWebServer(main_workdir) if 'MESOS_SLAVE_PID' in os.environ: # make unit test happy setup_cleaner_process(self.workdir) spawn(self.check_alive, driver) spawn(self.replier, driver) env.environ.update(args) from dpark.broadcast import start_download_manager start_download_manager() logger.debug('executor started at %s', agent_info.hostname) except Exception as e: import traceback msg = traceback.format_exc() logger.error('init executor failed: %s', msg) raise
def get_tmp(cls): dirs = env.get('WORKDIR') d = random.choice(dirs[1:]) if dirs[1:] else dirs[0] mkdir_p(d) return os.path.join(d, 'shuffle-%s.tmp' % uuid.uuid4().hex)
def registered(self, driver, executorInfo, frameworkInfo, agent_info): try: global Script ( Script, cwd, python_path, osenv, self.parallel, out_logger, err_logger, logLevel, use_color, args ) = marshal.loads(decode_data(executorInfo.data)) sys.path = python_path os.environ.update(osenv) setproctitle('[Executor]' + Script) prefix = formatter_message( '{MAGENTA}[%s]{RESET} ' % socket.gethostname().ljust(10), use_color ) init_dpark_logger(logLevel, use_color=use_color) logging.root.setLevel(logLevel) r1 = self.stdout_redirect = Redirect(1, out_logger, prefix) sys.stdout = r1.pipe_wfile r2 = self.stderr_redirect = Redirect(2, err_logger, prefix) sys.stderr = r2.pipe_wfile spawn_rconsole(locals()) if os.path.exists(cwd): try: os.chdir(cwd) except Exception as e: logger.warning('change cwd to %s failed: %s', cwd, e) else: logger.warning('cwd (%s) not exists', cwd) self.workdir = args['WORKDIR'] main_workdir = self.workdir[0] root = os.path.dirname(main_workdir) if not os.path.exists(root): os.mkdir(root) os.chmod(root, 0o777) # because umask mkdir_p(main_workdir) self._try_flock(main_workdir) args['SERVER_URI'] = startWebServer(main_workdir) if 'MESOS_SLAVE_PID' in os.environ: # make unit test happy setup_cleaner_process(self.workdir) spawn(self.check_alive, driver) spawn(self.replier, driver) env.environ.update(args) from dpark.broadcast import start_download_manager start_download_manager() logger.debug('executor started at %s', agent_info.hostname) except Exception as e: import traceback msg = traceback.format_exc() logger.error('init executor failed: %s', msg) raise