def registered(self, driver, executorInfo, frameworkInfo, agent_info): try: global Script ( Script, cwd, python_path, osenv, self.parallel, out_logger, err_logger, logLevel, use_color, dpark_env ) = marshal.loads(decode_data(executorInfo.data)) sys.path = python_path os.environ.update(osenv) setproctitle('[Executor]' + Script) prefix = formatter_message( '{MAGENTA}[%s]{RESET} ' % socket.gethostname().ljust(10), use_color ) init_dpark_logger(logLevel, use_color=use_color) logging.root.setLevel(logLevel) r1 = self.stdout_redirect = Redirect(1, out_logger, prefix) sys.stdout = r1.pipe_wfile r2 = self.stderr_redirect = Redirect(2, err_logger, prefix) sys.stderr = r2.pipe_wfile spawn_rconsole(locals()) if os.path.exists(cwd): try: os.chdir(cwd) except Exception as e: logger.warning('change cwd to %s failed: %s', cwd, e) else: logger.warning('cwd (%s) not exists', cwd) env.workdir.init(dpark_env.get(env.DPARK_ID)) self._try_flock(env.workdir.main) dpark_env['SERVER_URI'] = startWebServer(env.workdir.main) if 'MESOS_SLAVE_PID' in os.environ: # make unit test happy env.workdir.setup_cleaner_process() spawn(self.check_alive, driver) spawn(self.replier, driver) env.environ.update(dpark_env) from dpark.broadcast import start_download_manager start_download_manager() logger.debug('executor started at %s', agent_info.hostname) except Exception as e: import traceback msg = traceback.format_exc() logger.error('init executor failed: %s', msg) raise
def registered(self, driver, executorInfo, frameworkInfo, agent_info): try: global Script (Script, cwd, python_path, osenv, self.parallel, out_logger, err_logger, logLevel, use_color, dpark_env) = marshal.loads(decode_data(executorInfo.data)) sys.path = python_path os.environ.update(osenv) setproctitle('[Executor]' + Script) prefix = formatter_message( '{MAGENTA}[%s]{RESET} ' % socket.gethostname().ljust(10), use_color) init_dpark_logger(logLevel, use_color=use_color) logging.root.setLevel(logLevel) r1 = self.stdout_redirect = Redirect(1, out_logger, prefix) sys.stdout = r1.pipe_wfile r2 = self.stderr_redirect = Redirect(2, err_logger, prefix) sys.stderr = r2.pipe_wfile spawn_rconsole(locals()) if os.path.exists(cwd): try: os.chdir(cwd) except Exception as e: logger.warning('change cwd to %s failed: %s', cwd, e) else: logger.warning('cwd (%s) not exists', cwd) env.workdir.init(dpark_env.get(env.DPARK_ID)) self._try_flock(env.workdir.main) dpark_env['SERVER_URI'] = startWebServer(env.workdir.main) if 'MESOS_SLAVE_PID' in os.environ: # make unit test happy env.workdir.setup_cleaner_process() spawn(self.check_alive, driver) spawn(self.replier, driver) env.environ.update(dpark_env) from dpark.broadcast import start_download_manager start_download_manager() logger.debug('executor started at %s', agent_info.hostname) except Exception as e: import traceback msg = traceback.format_exc() logger.error('init executor failed: %s', msg) raise
def startReceiver(self): def _run(): while True: generator = self.func() try: for message in generator: if not self.ssc.sc.started: return with self._lock: self._messages.append(message) except: logger.exception('fail to receive') spawn(_run)
def startReceiver(self): def _run(): while True: generator = self.func() try: for message in generator: if not self.ssc.sc.started: return with self._lock: self._messages.append(message) except: logger.exception('fail to receive') spawn(_run)
def start(self): if self._started: return self._started = True self.requests = queue.Queue() self.results = queue.Queue(self.nthreads) self.threads = [spawn(self._fetch_thread) for i in range(self.nthreads)]
def start_driver(self): name = '[dpark] ' + \ os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:]) if len(name) > 256: name = name[:256] + '...' framework = Dict() framework.user = getuser() if framework.user == 'root': raise Exception('dpark is not allowed to run as \'root\'') framework.name = name if self.role: framework.role = self.role framework.hostname = socket.gethostname() if self.webui_url: framework.webui_url = self.webui_url self.driver = MesosSchedulerDriver(self, framework, self.master, use_addict=True) self.driver.start() logger.debug('Mesos Scheudler driver started') self.started = True self.last_finish_time = time.time() def check(): while self.started: with self.lock: now = time.time() if (not self.active_tasksets and now - self.last_finish_time > MAX_IDLE_TIME): logger.info( 'stop mesos scheduler after %d seconds idle', now - self.last_finish_time) self.stop() break for taskset in self.active_tasksets.values(): if taskset.check_task_timeout(): self.requestMoreResources() time.sleep(1) spawn(check)
def _createThriftServer(self): buf_que = deque() handler = ScribeHandler(buf_que) protocol_factory = TBinaryProtocol.TBinaryProtocolFactory(False, False) transport = TSocket.TServerSocket(host='0.0.0.0') processor = Processor(handler) server = TNonblockingServer.TNonblockingServer(processor, transport, protocol_factory) server._stop = False while True: try: server.prepare() port = transport.handle.getsockname()[1] logger.info('get scribe port succeed: %d', port) break except socket.error: pass spawn(server.serve) return server, port, buf_que
def start(self): if self._started: return self._started = True self.requests = queue.Queue() self.results = queue.Queue(self.nthreads) self.threads = [ spawn(self._fetch_thread) for i in range(self.nthreads) ]
def start(self): ctx = zmq.Context() sock = ctx.socket(zmq.PULL) port = sock.bind_to_random_port('tcp://0.0.0.0') self._started = True def collect_log(): while self._started: if sock.poll(1000, zmq.POLLIN): line = sock.recv() self.output.write(line) sock.close() ctx.destroy() spawn(collect_log) host = socket.gethostname() self.addr = 'tcp://%s:%d' % (host, port) logger.debug('log collecter start at %s', self.addr)
def _createThriftServer(self): buf_que = deque() handler = ScribeHandler(buf_que) protocol_factory = TBinaryProtocol.TBinaryProtocolFactory( False, False) transport = TSocket.TServerSocket(host='0.0.0.0') processor = Processor(handler) server = TNonblockingServer.TNonblockingServer( processor, transport, protocol_factory) server._stop = False while True: try: server.prepare() port = transport.handle.getsockname()[1] logger.info('get scribe port succeed: %d', port) break except socket.error: pass spawn(server.serve) return server, port, buf_que
def start_guide(self): sock = self.ctx.socket(zmq.REP) port = sock.bind_to_random_port('tcp://0.0.0.0') self.guide_addr = 'tcp://%s:%d' % (self.host, port) def run(): logger.debug("guide start at %s", self.guide_addr) while self._started: if not sock.poll(1000, zmq.POLLIN): continue type_, msg = sock.recv_pyobj() if type_ == GUIDE_STOP: sock.send_pyobj(0) break elif type_ == GUIDE_GET_SOURCES: uuid = msg sources = None if uuid in self.guides: sources = self.guides[uuid] else: logger.warning( 'uuid %s NOT REGISTERED in guide server', uuid) sock.send_pyobj(sources) elif type_ == GUIDE_SET_SOURCES: uuid, addr, bitmap = msg if any(bitmap): sources = None if uuid in self.guides: sources = self.guides[uuid] if sources: sources[addr] = bitmap else: self.guides[uuid] = {addr: bitmap} self.register_addr[uuid] = addr sock.send_pyobj(None) elif type_ == GUIDE_REPORT_BAD: uuid, addr = msg sources = self.guides[uuid] if addr in sources: if addr != self.register_addr[uuid]: del sources[addr] else: logger.warning( 'The addr %s to delete is the register Quit!!!', addr) sock.send_pyobj(None) else: logger.error('Unknown guide message: %s %s', type_, msg) sock.send_pyobj(None) return spawn(run)
def startWebServer(path): # check the default web server if not os.path.exists(path): os.makedirs(path) testpath = os.path.join(path, 'test') with open(testpath, 'w') as f: f.write(path) default_uri = 'http://%s:%d/%s' % (socket.gethostname(), DEFAULT_WEB_PORT, os.path.basename(path)) try: data = urllib.request.urlopen(default_uri + '/' + 'test').read() if data == path.encode('utf-8'): return default_uri except IOError: pass logger.warning('default webserver at %s not available', DEFAULT_WEB_PORT) LocalizedHTTP.basedir = os.path.dirname(path) ss = socketserver.TCPServer(('0.0.0.0', 0), LocalizedHTTP) spawn(ss.serve_forever) uri = 'http://%s:%d/%s' % (socket.gethostname(), ss.server_address[1], os.path.basename(path)) return uri
def start_guide(self): sock = self.ctx.socket(zmq.REP) port = sock.bind_to_random_port('tcp://0.0.0.0') self.guide_addr = 'tcp://%s:%d' % (self.host, port) def run(): logger.debug("guide start at %s", self.guide_addr) while self._started: if not sock.poll(1000, zmq.POLLIN): continue type_, msg = sock.recv_pyobj() if type_ == GUIDE_STOP: sock.send_pyobj(0) break elif type_ == GUIDE_GET_SOURCES: uuid = msg sources = None if uuid in self.guides: sources = self.guides[uuid] else: logger.warning('uuid %s NOT REGISTERED in guide server', uuid) sock.send_pyobj(sources) elif type_ == GUIDE_SET_SOURCES: uuid, addr, bitmap = msg if any(bitmap): sources = None if uuid in self.guides: sources = self.guides[uuid] if sources: sources[addr] = bitmap else: self.guides[uuid] = {addr: bitmap} self.register_addr[uuid] = addr sock.send_pyobj(None) elif type_ == GUIDE_REPORT_BAD: uuid, addr = msg sources = self.guides[uuid] if addr in sources: if addr != self.register_addr[uuid]: del sources[addr] else: logger.warning('The addr %s to delete is the register Quit!!!', addr) sock.send_pyobj(None) else: logger.error('Unknown guide message: %s %s', type_, msg) sock.send_pyobj(None) return spawn(run)
def startWebServer(path): # check the default web server if not os.path.exists(path): os.makedirs(path) testpath = os.path.join(path, 'test') with open(testpath, 'w') as f: f.write(path) default_uri = 'http://%s:%d/%s' % (socket.gethostname(), DEFAULT_WEB_PORT, os.path.basename(path)) try: data = urllib.request.urlopen(default_uri + '/' + 'test').read() if data == path.encode('utf-8'): return default_uri except IOError: pass logger.warning('default webserver at %s not available', DEFAULT_WEB_PORT) LocalizedHTTP.basedir = os.path.dirname(path) ss = socketserver.TCPServer(('0.0.0.0', 0), LocalizedHTTP) spawn(ss.serve_forever) uri = 'http://%s:%d/%s' % (socket.gethostname(), ss.server_address[1], os.path.basename(path)) return uri
def __init__(self, fd, addr, prefix): self.fd = fd self.addr = addr self.prefix = prefix self.fd_dup = os.dup(self.fd) self.origin_wfile = None self.pipe_rfd, self.pipe_wfd = os.pipe() self.pipe_rfile = os.fdopen(self.pipe_rfd, 'rb') self.pipe_wfile = os.fdopen(self.pipe_wfd, 'wb', 0) os.close(self.fd) os.dup2(self.pipe_wfd, self.fd) # assert os.dup(self.pipe_wfd) == self.fd, 'redirect io failed' self.ctx = zmq.Context() self._shutdown = False self.thread = None self.sock = None self.thread = spawn(self._forward)
def __init__(self, fd, addr, prefix): self.fd = fd self.addr = addr self.prefix = prefix self.fd_dup = os.dup(self.fd) self.origin_wfile = None self.pipe_rfd, self.pipe_wfd = os.pipe() self.pipe_rfile = os.fdopen(self.pipe_rfd, 'rb') self.pipe_wfile = os.fdopen(self.pipe_wfd, 'wb', 0) os.close(self.fd) os.dup2(self.pipe_wfd, self.fd) # assert os.dup(self.pipe_wfd) == self.fd, 'redirect io failed' self.ctx = zmq.Context() self._shutdown = False self.thread = None self.sock = None self.thread = spawn(self._forward)
def start(self, start): self.nextTime = (int(start // self.period) + 1) * self.period self.stopped = False self.thread = spawn(self.run) logger.debug("RecurringTimer started, nextTime is %d", self.nextTime)
def start(self, start): self.nextTime = (int(start // self.period) + 1) * self.period self.stopped = False self.thread = spawn(self.run) logger.debug("RecurringTimer started, nextTime is %d", self.nextTime)
def run(): logger.debug("server started at %s", server_addr) while self._started: if not sock.poll(1000, zmq.POLLIN): continue type_, msg = sock.recv_pyobj() logger.debug('server recv: %s %s', type_, msg) if type_ == SERVER_STOP: sock.send_pyobj(None) break elif type_ == SERVER_FETCH: uuid, indices, client_addr = msg if uuid in self.master_broadcast_blocks: block_num = len(self.master_broadcast_blocks[uuid]) bls = [] for index in indices: if index >= block_num: logger.warning('input index too big %s for ' 'len of blocks %d from host %s', str(indices), block_num, client_addr) sock.send_pyobj((SERVER_FETCH_FAIL, None)) else: bls.append(self.master_broadcast_blocks[uuid][index]) sock.send_pyobj((SERVER_FETCH_OK, (indices, bls))) elif uuid in self.uuid_state_dict: fd = os.open(self.uuid_state_dict[uuid][0], os.O_RDONLY) mmfp = mmap.mmap(fd, 0, access=ACCESS_READ) os.close(fd) bitmap = self.uuid_map_dict[uuid] block_num = len(bitmap) bls = [] for index in indices: if index >= block_num: logger.warning('input index too big %s for ' 'len of blocks %d from host %s', str(indices), block_num, client_addr) sock.send_pyobj((SERVER_FETCH_FAIL, None)) else: mmfp.seek(bitmap[index][0]) block = mmfp.read(bitmap[index][1]) bls.append(block) mmfp.close() sock.send_pyobj((SERVER_FETCH_OK, (indices, bls))) else: logger.warning('server fetch failed for uuid %s ' 'not exists in server %s from host %s', uuid, socket.gethostname(), client_addr) sock.send_pyobj((SERVER_FETCH_FAIL, None)) elif type_ == DATA_GET: uuid, compressed_size = msg if uuid not in self.uuid_state_dict or not self.uuid_state_dict[uuid][1]: if uuid not in self.download_threads: sources = self._get_sources(uuid, guide_sock) if not sources: logger.warning('get sources from guide server failed in host %s', socket.gethostname()) sock.send_pyobj(DATA_GET_FAIL) continue self.download_threads[uuid] = spawn(self._download_blocks, *[sources, uuid, compressed_size]) sock.send_pyobj(DATA_DOWNLOADING) else: sock.send_pyobj(DATA_DOWNLOADING) else: sock.send_pyobj(DATA_GET_OK) elif type_ == SERVER_CLEAR_ITEM: uuid = msg self.clear(uuid) sock.send_pyobj(None) else: logger.error('Unknown server message: %s %s', type_, msg) sock.send_pyobj(None) sock.close() logger.debug("stop Broadcast server %s", server_addr) for uuid in list(self.uuid_state_dict.keys()): self.clear(uuid)
def start(self): if self.ctx is None: self.ctx = zmq.Context() self.thread = spawn(self.run) while self.addr is None: time.sleep(0.01)