예제 #1
0
파일: executor.py 프로젝트: douban/dpark
    def registered(self, driver, executorInfo, frameworkInfo, agent_info):
        try:
            global Script
            (
                Script, cwd, python_path, osenv, self.parallel,
                out_logger, err_logger, logLevel, use_color, dpark_env
            ) = marshal.loads(decode_data(executorInfo.data))

            sys.path = python_path
            os.environ.update(osenv)
            setproctitle('[Executor]' + Script)

            prefix = formatter_message(
                '{MAGENTA}[%s]{RESET} ' % socket.gethostname().ljust(10),
                use_color
            )

            init_dpark_logger(logLevel, use_color=use_color)
            logging.root.setLevel(logLevel)

            r1 = self.stdout_redirect = Redirect(1, out_logger, prefix)
            sys.stdout = r1.pipe_wfile

            r2 = self.stderr_redirect = Redirect(2, err_logger, prefix)
            sys.stderr = r2.pipe_wfile

            spawn_rconsole(locals())

            if os.path.exists(cwd):
                try:
                    os.chdir(cwd)
                except Exception as e:
                    logger.warning('change cwd to %s failed: %s', cwd, e)
            else:
                logger.warning('cwd (%s) not exists', cwd)

            env.workdir.init(dpark_env.get(env.DPARK_ID))
            self._try_flock(env.workdir.main)
            dpark_env['SERVER_URI'] = startWebServer(env.workdir.main)
            if 'MESOS_SLAVE_PID' in os.environ:  # make unit test happy
                env.workdir.setup_cleaner_process()

            spawn(self.check_alive, driver)
            spawn(self.replier, driver)

            env.environ.update(dpark_env)
            from dpark.broadcast import start_download_manager
            start_download_manager()

            logger.debug('executor started at %s', agent_info.hostname)

        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            logger.error('init executor failed: %s', msg)
            raise
예제 #2
0
    def registered(self, driver, executorInfo, frameworkInfo, agent_info):
        try:
            global Script
            (Script, cwd, python_path, osenv, self.parallel, out_logger,
             err_logger, logLevel, use_color,
             dpark_env) = marshal.loads(decode_data(executorInfo.data))

            sys.path = python_path
            os.environ.update(osenv)
            setproctitle('[Executor]' + Script)

            prefix = formatter_message(
                '{MAGENTA}[%s]{RESET} ' % socket.gethostname().ljust(10),
                use_color)

            init_dpark_logger(logLevel, use_color=use_color)
            logging.root.setLevel(logLevel)

            r1 = self.stdout_redirect = Redirect(1, out_logger, prefix)
            sys.stdout = r1.pipe_wfile

            r2 = self.stderr_redirect = Redirect(2, err_logger, prefix)
            sys.stderr = r2.pipe_wfile

            spawn_rconsole(locals())

            if os.path.exists(cwd):
                try:
                    os.chdir(cwd)
                except Exception as e:
                    logger.warning('change cwd to %s failed: %s', cwd, e)
            else:
                logger.warning('cwd (%s) not exists', cwd)

            env.workdir.init(dpark_env.get(env.DPARK_ID))
            self._try_flock(env.workdir.main)
            dpark_env['SERVER_URI'] = startWebServer(env.workdir.main)
            if 'MESOS_SLAVE_PID' in os.environ:  # make unit test happy
                env.workdir.setup_cleaner_process()

            spawn(self.check_alive, driver)
            spawn(self.replier, driver)

            env.environ.update(dpark_env)
            from dpark.broadcast import start_download_manager
            start_download_manager()

            logger.debug('executor started at %s', agent_info.hostname)

        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            logger.error('init executor failed: %s', msg)
            raise
예제 #3
0
파일: dstream.py 프로젝트: szkb/dpark
    def startReceiver(self):
        def _run():
            while True:
                generator = self.func()
                try:
                    for message in generator:
                        if not self.ssc.sc.started:
                            return
                        with self._lock:
                            self._messages.append(message)
                except:
                    logger.exception('fail to receive')

        spawn(_run)
예제 #4
0
파일: dstream.py 프로젝트: douban/dpark
    def startReceiver(self):
        def _run():
            while True:
                generator = self.func()
                try:
                    for message in generator:
                        if not self.ssc.sc.started:
                            return
                        with self._lock:
                            self._messages.append(message)
                except:
                    logger.exception('fail to receive')

        spawn(_run)
예제 #5
0
    def start(self):
        if self._started:
            return

        self._started = True
        self.requests = queue.Queue()
        self.results = queue.Queue(self.nthreads)
        self.threads = [spawn(self._fetch_thread)
                        for i in range(self.nthreads)]
예제 #6
0
파일: schedule.py 프로젝트: posens/dpark
    def start_driver(self):
        name = '[dpark] ' + \
               os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:])
        if len(name) > 256:
            name = name[:256] + '...'
        framework = Dict()
        framework.user = getuser()
        if framework.user == 'root':
            raise Exception('dpark is not allowed to run as \'root\'')
        framework.name = name
        if self.role:
            framework.role = self.role
        framework.hostname = socket.gethostname()
        if self.webui_url:
            framework.webui_url = self.webui_url

        self.driver = MesosSchedulerDriver(self,
                                           framework,
                                           self.master,
                                           use_addict=True)
        self.driver.start()
        logger.debug('Mesos Scheudler driver started')

        self.started = True
        self.last_finish_time = time.time()

        def check():
            while self.started:
                with self.lock:
                    now = time.time()
                    if (not self.active_tasksets
                            and now - self.last_finish_time > MAX_IDLE_TIME):
                        logger.info(
                            'stop mesos scheduler after %d seconds idle',
                            now - self.last_finish_time)
                        self.stop()
                        break

                    for taskset in self.active_tasksets.values():
                        if taskset.check_task_timeout():
                            self.requestMoreResources()
                time.sleep(1)

        spawn(check)
예제 #7
0
파일: dstream.py 프로젝트: douban/dpark
 def _createThriftServer(self):
     buf_que = deque()
     handler = ScribeHandler(buf_que)
     protocol_factory = TBinaryProtocol.TBinaryProtocolFactory(False, False)
     transport = TSocket.TServerSocket(host='0.0.0.0')
     processor = Processor(handler)
     server = TNonblockingServer.TNonblockingServer(processor, transport,
                                                    protocol_factory)
     server._stop = False
     while True:
         try:
             server.prepare()
             port = transport.handle.getsockname()[1]
             logger.info('get scribe port succeed: %d', port)
             break
         except socket.error:
             pass
     spawn(server.serve)
     return server, port, buf_que
예제 #8
0
    def start(self):
        if self._started:
            return

        self._started = True
        self.requests = queue.Queue()
        self.results = queue.Queue(self.nthreads)
        self.threads = [
            spawn(self._fetch_thread) for i in range(self.nthreads)
        ]
예제 #9
0
파일: schedule.py 프로젝트: posens/dpark
    def start(self):
        ctx = zmq.Context()
        sock = ctx.socket(zmq.PULL)
        port = sock.bind_to_random_port('tcp://0.0.0.0')
        self._started = True

        def collect_log():
            while self._started:
                if sock.poll(1000, zmq.POLLIN):
                    line = sock.recv()
                    self.output.write(line)

            sock.close()
            ctx.destroy()

        spawn(collect_log)

        host = socket.gethostname()
        self.addr = 'tcp://%s:%d' % (host, port)
        logger.debug('log collecter start at %s', self.addr)
예제 #10
0
파일: dstream.py 프로젝트: szkb/dpark
 def _createThriftServer(self):
     buf_que = deque()
     handler = ScribeHandler(buf_que)
     protocol_factory = TBinaryProtocol.TBinaryProtocolFactory(
         False, False)
     transport = TSocket.TServerSocket(host='0.0.0.0')
     processor = Processor(handler)
     server = TNonblockingServer.TNonblockingServer(
         processor, transport, protocol_factory)
     server._stop = False
     while True:
         try:
             server.prepare()
             port = transport.handle.getsockname()[1]
             logger.info('get scribe port succeed: %d', port)
             break
         except socket.error:
             pass
     spawn(server.serve)
     return server, port, buf_que
예제 #11
0
    def start_guide(self):
        sock = self.ctx.socket(zmq.REP)
        port = sock.bind_to_random_port('tcp://0.0.0.0')
        self.guide_addr = 'tcp://%s:%d' % (self.host, port)

        def run():
            logger.debug("guide start at %s", self.guide_addr)

            while self._started:
                if not sock.poll(1000, zmq.POLLIN):
                    continue

                type_, msg = sock.recv_pyobj()
                if type_ == GUIDE_STOP:
                    sock.send_pyobj(0)
                    break
                elif type_ == GUIDE_GET_SOURCES:
                    uuid = msg
                    sources = None
                    if uuid in self.guides:
                        sources = self.guides[uuid]
                    else:
                        logger.warning(
                            'uuid %s NOT REGISTERED in guide server', uuid)
                    sock.send_pyobj(sources)
                elif type_ == GUIDE_SET_SOURCES:
                    uuid, addr, bitmap = msg
                    if any(bitmap):
                        sources = None
                        if uuid in self.guides:
                            sources = self.guides[uuid]
                        if sources:
                            sources[addr] = bitmap
                        else:
                            self.guides[uuid] = {addr: bitmap}
                            self.register_addr[uuid] = addr
                    sock.send_pyobj(None)
                elif type_ == GUIDE_REPORT_BAD:
                    uuid, addr = msg
                    sources = self.guides[uuid]
                    if addr in sources:
                        if addr != self.register_addr[uuid]:
                            del sources[addr]
                        else:
                            logger.warning(
                                'The addr %s to delete is the register Quit!!!',
                                addr)
                    sock.send_pyobj(None)
                else:
                    logger.error('Unknown guide message: %s %s', type_, msg)
                    sock.send_pyobj(None)

        return spawn(run)
예제 #12
0
파일: executor.py 프로젝트: posens/dpark
def startWebServer(path):
    # check the default web server
    if not os.path.exists(path):
        os.makedirs(path)
    testpath = os.path.join(path, 'test')
    with open(testpath, 'w') as f:
        f.write(path)
    default_uri = 'http://%s:%d/%s' % (socket.gethostname(), DEFAULT_WEB_PORT,
                                       os.path.basename(path))
    try:
        data = urllib.request.urlopen(default_uri + '/' + 'test').read()
        if data == path.encode('utf-8'):
            return default_uri
    except IOError:
        pass

    logger.warning('default webserver at %s not available', DEFAULT_WEB_PORT)
    LocalizedHTTP.basedir = os.path.dirname(path)
    ss = socketserver.TCPServer(('0.0.0.0', 0), LocalizedHTTP)
    spawn(ss.serve_forever)
    uri = 'http://%s:%d/%s' % (socket.gethostname(), ss.server_address[1],
                               os.path.basename(path))
    return uri
예제 #13
0
파일: broadcast.py 프로젝트: douban/dpark
    def start_guide(self):
        sock = self.ctx.socket(zmq.REP)
        port = sock.bind_to_random_port('tcp://0.0.0.0')
        self.guide_addr = 'tcp://%s:%d' % (self.host, port)

        def run():
            logger.debug("guide start at %s", self.guide_addr)

            while self._started:
                if not sock.poll(1000, zmq.POLLIN):
                    continue

                type_, msg = sock.recv_pyobj()
                if type_ == GUIDE_STOP:
                    sock.send_pyobj(0)
                    break
                elif type_ == GUIDE_GET_SOURCES:
                    uuid = msg
                    sources = None
                    if uuid in self.guides:
                        sources = self.guides[uuid]
                    else:
                        logger.warning('uuid %s NOT REGISTERED in guide server', uuid)
                    sock.send_pyobj(sources)
                elif type_ == GUIDE_SET_SOURCES:
                    uuid, addr, bitmap = msg
                    if any(bitmap):
                        sources = None
                        if uuid in self.guides:
                            sources = self.guides[uuid]
                        if sources:
                            sources[addr] = bitmap
                        else:
                            self.guides[uuid] = {addr: bitmap}
                            self.register_addr[uuid] = addr
                    sock.send_pyobj(None)
                elif type_ == GUIDE_REPORT_BAD:
                    uuid, addr = msg
                    sources = self.guides[uuid]
                    if addr in sources:
                        if addr != self.register_addr[uuid]:
                            del sources[addr]
                        else:
                            logger.warning('The addr %s to delete is the register Quit!!!', addr)
                    sock.send_pyobj(None)
                else:
                    logger.error('Unknown guide message: %s %s', type_, msg)
                    sock.send_pyobj(None)

        return spawn(run)
예제 #14
0
파일: executor.py 프로젝트: douban/dpark
def startWebServer(path):
    # check the default web server
    if not os.path.exists(path):
        os.makedirs(path)
    testpath = os.path.join(path, 'test')
    with open(testpath, 'w') as f:
        f.write(path)
    default_uri = 'http://%s:%d/%s' % (socket.gethostname(), DEFAULT_WEB_PORT,
                                       os.path.basename(path))
    try:
        data = urllib.request.urlopen(default_uri + '/' + 'test').read()
        if data == path.encode('utf-8'):
            return default_uri
    except IOError:
        pass

    logger.warning('default webserver at %s not available', DEFAULT_WEB_PORT)
    LocalizedHTTP.basedir = os.path.dirname(path)
    ss = socketserver.TCPServer(('0.0.0.0', 0), LocalizedHTTP)
    spawn(ss.serve_forever)
    uri = 'http://%s:%d/%s' % (socket.gethostname(), ss.server_address[1],
                               os.path.basename(path))
    return uri
예제 #15
0
파일: executor.py 프로젝트: posens/dpark
    def __init__(self, fd, addr, prefix):
        self.fd = fd
        self.addr = addr
        self.prefix = prefix

        self.fd_dup = os.dup(self.fd)
        self.origin_wfile = None

        self.pipe_rfd, self.pipe_wfd = os.pipe()
        self.pipe_rfile = os.fdopen(self.pipe_rfd, 'rb')
        self.pipe_wfile = os.fdopen(self.pipe_wfd, 'wb', 0)

        os.close(self.fd)
        os.dup2(self.pipe_wfd, self.fd)
        # assert os.dup(self.pipe_wfd) == self.fd, 'redirect io failed'

        self.ctx = zmq.Context()
        self._shutdown = False
        self.thread = None
        self.sock = None

        self.thread = spawn(self._forward)
예제 #16
0
파일: executor.py 프로젝트: douban/dpark
    def __init__(self, fd, addr, prefix):
        self.fd = fd
        self.addr = addr
        self.prefix = prefix

        self.fd_dup = os.dup(self.fd)
        self.origin_wfile = None

        self.pipe_rfd, self.pipe_wfd = os.pipe()
        self.pipe_rfile = os.fdopen(self.pipe_rfd, 'rb')
        self.pipe_wfile = os.fdopen(self.pipe_wfd, 'wb', 0)

        os.close(self.fd)
        os.dup2(self.pipe_wfd, self.fd)
        # assert os.dup(self.pipe_wfd) == self.fd, 'redirect io failed'

        self.ctx = zmq.Context()
        self._shutdown = False
        self.thread = None
        self.sock = None

        self.thread = spawn(self._forward)
예제 #17
0
파일: dstream.py 프로젝트: szkb/dpark
 def start(self, start):
     self.nextTime = (int(start // self.period) + 1) * self.period
     self.stopped = False
     self.thread = spawn(self.run)
     logger.debug("RecurringTimer started, nextTime is %d", self.nextTime)
예제 #18
0
파일: dstream.py 프로젝트: douban/dpark
 def start(self, start):
     self.nextTime = (int(start // self.period) + 1) * self.period
     self.stopped = False
     self.thread = spawn(self.run)
     logger.debug("RecurringTimer started, nextTime is %d", self.nextTime)
예제 #19
0
        def run():
            logger.debug("server started at %s", server_addr)

            while self._started:
                if not sock.poll(1000, zmq.POLLIN):
                    continue
                type_, msg = sock.recv_pyobj()
                logger.debug('server recv: %s %s', type_, msg)
                if type_ == SERVER_STOP:
                    sock.send_pyobj(None)
                    break
                elif type_ == SERVER_FETCH:
                    uuid, indices, client_addr = msg
                    if uuid in self.master_broadcast_blocks:
                        block_num = len(self.master_broadcast_blocks[uuid])
                        bls = []
                        for index in indices:
                            if index >= block_num:
                                logger.warning('input index too big %s for '
                                               'len of blocks  %d from host %s',
                                               str(indices), block_num, client_addr)
                                sock.send_pyobj((SERVER_FETCH_FAIL, None))
                            else:
                                bls.append(self.master_broadcast_blocks[uuid][index])
                        sock.send_pyobj((SERVER_FETCH_OK, (indices, bls)))
                    elif uuid in self.uuid_state_dict:
                        fd = os.open(self.uuid_state_dict[uuid][0], os.O_RDONLY)
                        mmfp = mmap.mmap(fd, 0, access=ACCESS_READ)
                        os.close(fd)
                        bitmap = self.uuid_map_dict[uuid]
                        block_num = len(bitmap)
                        bls = []
                        for index in indices:
                            if index >= block_num:
                                logger.warning('input index too big %s for '
                                               'len of blocks  %d from host %s',
                                               str(indices), block_num, client_addr)
                                sock.send_pyobj((SERVER_FETCH_FAIL, None))
                            else:
                                mmfp.seek(bitmap[index][0])
                                block = mmfp.read(bitmap[index][1])
                                bls.append(block)
                        mmfp.close()
                        sock.send_pyobj((SERVER_FETCH_OK, (indices, bls)))
                    else:
                        logger.warning('server fetch failed for uuid %s '
                                       'not exists in server %s from host %s',
                                       uuid, socket.gethostname(), client_addr)
                        sock.send_pyobj((SERVER_FETCH_FAIL, None))
                elif type_ == DATA_GET:
                    uuid, compressed_size = msg
                    if uuid not in self.uuid_state_dict or not self.uuid_state_dict[uuid][1]:
                        if uuid not in self.download_threads:
                            sources = self._get_sources(uuid, guide_sock)
                            if not sources:
                                logger.warning('get sources from guide server failed in host %s',
                                               socket.gethostname())
                                sock.send_pyobj(DATA_GET_FAIL)
                                continue
                            self.download_threads[uuid] = spawn(self._download_blocks,
                                                                *[sources, uuid, compressed_size])
                            sock.send_pyobj(DATA_DOWNLOADING)
                        else:
                            sock.send_pyobj(DATA_DOWNLOADING)
                    else:
                        sock.send_pyobj(DATA_GET_OK)
                elif type_ == SERVER_CLEAR_ITEM:
                    uuid = msg
                    self.clear(uuid)
                    sock.send_pyobj(None)
                else:
                    logger.error('Unknown server message: %s %s', type_, msg)
                    sock.send_pyobj(None)

            sock.close()
            logger.debug("stop Broadcast server %s", server_addr)
            for uuid in list(self.uuid_state_dict.keys()):
                self.clear(uuid)
예제 #20
0
파일: tracker.py 프로젝트: douban/dpark
 def start(self):
     if self.ctx is None:
         self.ctx = zmq.Context()
     self.thread = spawn(self.run)
     while self.addr is None:
         time.sleep(0.01)