예제 #1
0
def _discoro_proc():
    # coroutine
    """Server process receives computations and runs coroutines for it.
    """

    import os
    import shutil
    import traceback
    import sys
    import time

    try:
        import psutil
    except:
        psutil = None

    import asyncoro.disasyncoro as asyncoro
    from asyncoro import Coro
    from asyncoro.discoro import MinPulseInterval, MaxPulseInterval, \
         DiscoroNodeInfo, DiscoroNodeStatus

    _discoro_coro = asyncoro.AsynCoro.cur_coro()
    _discoro_config = yield _discoro_coro.receive()
    assert _discoro_config['req'] == 'config'
    _discoro_coro.register('discoro_server')
    _discoro_name = asyncoro.AsynCoro.instance().name
    asyncoro.AsynCoro.instance().dest_path = os.path.join('discoro',
                                                          'server%s' % (_discoro_config['id']))
    _discoro_dest_path = asyncoro.AsynCoro.instance().dest_path
    _discoro_pid_path = os.path.join(_discoro_dest_path, '..',
                                     'server%s.pid' % (_discoro_config['id']))
    _discoro_pid_path = os.path.normpath(_discoro_pid_path)
    # TODO: is file locking necessary?
    if os.path.exists(_discoro_pid_path):
        with open(_discoro_pid_path, 'r') as _discoro_req:
            _discoro_var = _discoro_req.read()
        _discoro_var = int(_discoro_var)
        if not _discoro_config['phoenix']:
            print('\n   Another discoronode seems to be running;\n'
                  '   make sure server with PID %d quit and remove "%s"\n' %
                  (_discoro_var, _discoro_pid_path))
            _discoro_var = os.getpid()

        import signal
        try:
            os.kill(_discoro_var, signal.SIGTERM)
        except:
            pass
        else:
            time.sleep(0.1)
            try:
                if os.waitpid(_discoro_var, os.WNOHANG)[0] != _discoro_var:
                    asyncoro.logger.warning('Killing process %d failed' % _discoro_var)
            except:
                pass
        del signal
    if os.path.isdir(_discoro_dest_path):
        shutil.rmtree(_discoro_dest_path)
    os.makedirs(_discoro_dest_path)
    os.chdir(_discoro_dest_path)
    with open(_discoro_pid_path, 'w') as _discoro_var:
        _discoro_var.write('%s' % os.getpid())
    asyncoro.logger.debug('discoro server "%s" started at %s; '
                          'computation files will be saved in "%s"' %
                          (_discoro_name, _discoro_coro.location, _discoro_dest_path))
    _discoro_req = _discoro_client = _discoro_auth = _discoro_msg = None
    _discoro_timer_coro = _discoro_pulse_coro = _discoro_timer_proc = _discoro_peer_status = None
    _discoro_monitor_coro = _discoro_monitor_proc = _discoro_node_status = None
    _discoro_computation = _discoro_func = _discoro_var = None
    _discoro_job_coros = set()
    _discoro_busy_time = time.time()
    _discoro_globals = {}
    _discoro_locals = {}
    _discoro_globals.update(globals())
    _discoro_locals.update(locals())

    def _discoro_timer_proc(coro=None):
        coro.set_daemon()
        last_pulse = time.time()
        interval = None
        while True:
            reset = yield coro.sleep(interval)
            if reset:
                if not isinstance(_discoro_pulse_coro, Coro):
                    interval = None
                    continue
                interval = reset
                last_pulse = time.time()
                continue
            if not _discoro_pulse_coro:
                continue
            msg = {'ncoros': len(_discoro_job_coros), 'location': coro.location}
            if _discoro_node_status:
                msg['node_status'] = DiscoroNodeStatus(coro.location.addr, psutil.cpu_percent(),
                                                       psutil.virtual_memory().percent,
                                                       psutil.disk_usage(_discoro_dest_path).percent)

            if _discoro_pulse_coro.send(msg) == 0:
                last_pulse = time.time()
            elif (time.time() - last_pulse) > (5 * interval) and _discoro_computation:
                asyncoro.logger.warning('scheduler is not reachable; closing computation "%s"' %
                                        _discoro_computation._auth)
                _discoro_coro.send({'req': 'close', 'auth': _discoro_computation._auth})

            if ((not _discoro_job_coros) and _discoro_computation.zombie_period and
               ((time.time() - _discoro_busy_time) > _discoro_computation.zombie_period)):
                asyncoro.logger.debug('%s: zombie computation "%s"' %
                                      (coro.location, _discoro_computation._auth))
                # TODO: close? For now wait for "too many" timeouts to close

    def _discoro_peer_status(coro=None):
        coro.set_daemon()
        while True:
            status = yield coro.receive()
            if isinstance(status, asyncoro.PeerStatus) and \
               status.status == asyncoro.PeerStatus.Offline and \
               _discoro_pulse_coro and _discoro_pulse_coro.location == status.location:
                asyncoro.logger.debug('scheduler at %s quit; closing computation %s' %
                                      (status.location, _discoro_computation._auth))
                msg = {'req': 'close', 'auth': _discoro_computation._auth}
                _discoro_coro.send(msg)

    def _discoro_monitor_proc(coro=None):
        nonlocal _discoro_busy_time
        coro.set_daemon()
        while True:
            msg = yield coro.receive()
            if isinstance(msg, asyncoro.MonitorException):
                _discoro_busy_time = time.time()
                asyncoro.logger.debug('job %s done' % msg.args[0])
                _discoro_job_coros.discard(msg.args[0])
            else:
                asyncoro.logger.warning('%s: invalid monitor message ignored' % coro.location)

    _discoro_timer_coro = Coro(_discoro_timer_proc)
    _discoro_monitor_coro = Coro(_discoro_monitor_proc)
    asyncoro.AsynCoro.instance().peer_status(Coro(_discoro_peer_status))

    while True:
        _discoro_msg = yield _discoro_coro.receive()
        if not isinstance(_discoro_msg, dict):
            continue
        _discoro_req = _discoro_msg.get('req', None)

        if _discoro_req == 'run':
            _discoro_client = _discoro_msg.get('client', None)
            _discoro_auth = _discoro_msg.get('auth', None)
            _discoro_func = _discoro_msg.get('func', None)
            if not isinstance(_discoro_client, Coro) or not _discoro_computation or \
               _discoro_auth != _discoro_computation._auth:
                asyncoro.logger.warning('invalid run: %s' % (type(_discoro_func)))
                if isinstance(_discoro_client, Coro):
                    _discoro_client.send(None)
                continue
            try:
                _discoro_func = asyncoro.unserialize(_discoro_func)
                if _discoro_func.code:
                    exec(_discoro_func.code, globals())
                job_coro = Coro(globals()[_discoro_func.name],
                                *(_discoro_func.args), **(_discoro_func.kwargs))
            except:
                asyncoro.logger.debug('invalid computation to run')
                # _discoro_func = Scheduler._Function(_discoro_func.name, None,
                #                                     _discoro_func.args, _discoro_func.kwargs)
                job_coro = (sys.exc_info()[0], getattr(_discoro_func, 'name', _discoro_func),
                            traceback.format_exc())
            else:
                asyncoro.logger.debug('job %s created' % job_coro)
                _discoro_job_coros.add(job_coro)
                job_coro.notify(_discoro_monitor_coro)
                _discoro_var = _discoro_msg.get('notify', None)
                if isinstance(_discoro_var, Coro):
                    job_coro.notify(_discoro_var)
            _discoro_busy_time = time.time()
            _discoro_client.send(job_coro)
            del job_coro
        elif _discoro_req == 'setup':
            _discoro_client = _discoro_msg.get('client', None)
            _discoro_pulse_coro = _discoro_msg.get('pulse_coro', None)
            if not isinstance(_discoro_client, Coro) or not isinstance(_discoro_pulse_coro, Coro):
                continue
            if _discoro_computation is not None:
                asyncoro.logger.debug('invalid "setup" - busy')
                _discoro_client.send(-1)
                continue
            os.chdir(_discoro_dest_path)
            try:
                _discoro_computation = _discoro_msg['computation']
                exec('import asyncoro.disasyncoro as asyncoro', globals())
                if __name__ == '__mp_main__':  # Windows multiprocessing process
                    exec('import asyncoro.disasyncoro as asyncoro',
                         sys.modules['__mp_main__'].__dict__)
                if _discoro_computation._code:
                    exec(_discoro_computation._code, globals())
                    if __name__ == '__mp_main__':  # Windows multiprocessing process
                        exec(_discoro_computation._code, sys.modules['__mp_main__'].__dict__)
            except:
                _discoro_computation = None
                asyncoro.logger.warning('invalid computation')
                asyncoro.logger.debug(traceback.format_exc())
                _discoro_client.send(-1)
                continue
            if psutil and _discoro_msg.get('node_status', None):
                _discoro_node_status = True
            if isinstance(_discoro_computation.pulse_interval, int) and \
               MinPulseInterval <= _discoro_computation.pulse_interval <= MaxPulseInterval:
                _discoro_computation.pulse_interval = _discoro_computation.pulse_interval
            else:
                _discoro_computation.pulse_interval = MinPulseInterval
            _discoro_timer_coro.resume(_discoro_computation.pulse_interval)
            _discoro_busy_time = time.time()
            asyncoro.logger.debug('computation "%s" from %s' %
                                  (_discoro_computation._auth, _discoro_msg['client'].location))
            _discoro_client.send(0)
        elif _discoro_req == 'close':
            _discoro_auth = _discoro_msg.get('auth', None)
            if not _discoro_computation or (_discoro_auth != _discoro_computation._auth and
                                            _discoro_auth != _discoro_config['auth']):
                continue
            asyncoro.logger.debug('%s deleting computation "%s"' %
                                  (_discoro_coro.location, _discoro_computation._auth))
            if _discoro_auth != _discoro_computation._auth and _discoro_pulse_coro:
                _discoro_pulse_coro.send({'status': 'ServerClosed',
                                          'location': _discoro_coro.location})
            for _discoro_var in _discoro_job_coros:
                _discoro_var.terminate()
            _discoro_job_coros = set()

            if __name__ == '__mp_main__':  # Windows multiprocessing process
                for _discoro_var in list(globals()):
                    if _discoro_var not in _discoro_globals:
                        globals().pop(_discoro_var, None)
                        sys.modules['__mp_main__'].__dict__.pop(_discoro_var, None)
                globals().update(_discoro_globals)
                sys.modules['__mp_main__'].__dict__.update(_discoro_globals)
            else:
                for _discoro_var in list(globals()):
                    if _discoro_var not in _discoro_globals:
                        globals().pop(_discoro_var, None)
                globals().update(_discoro_globals)

            for _discoro_var in os.listdir(_discoro_dest_path):
                _discoro_var = os.path.join(_discoro_dest_path, _discoro_var)
                if os.path.isdir(_discoro_var) and not os.path.islink(_discoro_var):
                    shutil.rmtree(_discoro_var, ignore_errors=True)
                else:
                    os.remove(_discoro_var)
            if not os.path.isdir(_discoro_dest_path):
                try:
                    os.remove(_discoro_dest_path)
                except:
                    pass
                os.makedirs(_discoro_dest_path)
            if not os.path.isfile(_discoro_pid_path):
                try:
                    if os.path.islink(_discoro_pid_path):
                        os.remove(_discoro_pid_path)
                    else:
                        shutil.rmtree(_discoro_pid_path)
                    with open(_discoro_pid_path, 'w') as _discoro_var:
                        _discoro_var.write('%s' % os.getpid())
                except:
                    asyncoro.logger.warning('PID file "%s" is invalid' % _discoro_pid_path)
            os.chdir(_discoro_dest_path)
            asyncoro.AsynCoro.instance().dest_path = _discoro_dest_path
            _discoro_computation = _discoro_client = _discoro_pulse_coro = None
            _discoro_node_status = None
            if _discoro_config['serve'] > 0:
                _discoro_config['serve'] -= 1
                if _discoro_config['serve'] == 0:
                    break
            _discoro_timer_coro.resume(MinPulseInterval)
        elif _discoro_req == 'node_info':
            if psutil:
                info = DiscoroNodeInfo(
                    _discoro_name, _discoro_coro.location.addr,
                    psutil.cpu_count(), psutil.cpu_percent(),
                    {_discoro_var: getattr(psutil.virtual_memory(), _discoro_var)
                     for _discoro_var in ['total', 'percent']},
                    {_discoro_var: getattr(psutil.disk_usage(_discoro_dest_path), _discoro_var)
                     for _discoro_var in ['total', 'percent']}
                    )
                if _discoro_msg.get('node_status', None):
                    _discoro_node_status = True
            else:
                info = DiscoroNodeInfo(_discoro_name, _discoro_coro.location.addr,
                                       -1, -1, None, None)
            _discoro_client = _discoro_msg.get('client', None)
            if not isinstance(_discoro_client, Coro):
                continue
            _discoro_client.send(info)
        elif _discoro_req == 'status':
            if _discoro_msg.get('auth', None) != _discoro_config['auth']:
                asyncoro.logger.debug('ignoring info: %s' % (_discoro_msg.get('auth')))
                continue
            if _discoro_pulse_coro:
                print('  Server %s running %d coroutines for computation at %s' %
                      (_discoro_coro.location, len(_discoro_job_coros),
                       _discoro_pulse_coro.location))
            else:
                print('  Server %s not used by any computation' % (_discoro_coro.location))
        elif _discoro_req == 'quit':
            if _discoro_msg.get('auth', None) != _discoro_config['auth']:
                asyncoro.logger.debug('ignoring quit: %s' % (_discoro_msg.get('auth')))
                continue
            if _discoro_pulse_coro:
                _discoro_pulse_coro.send({'status': 'ServerClosed',
                                          'location': _discoro_coro.location})
            break
        elif _discoro_req == 'terminate':
            if _discoro_msg.get('auth', None) != _discoro_config['auth']:
                asyncoro.logger.debug('ignoring terminate: %s' % (_discoro_msg.get('auth')))
                continue
            if _discoro_pulse_coro:
                _discoro_pulse_coro.send({'status': 'ServerTerminated',
                                          'location': _discoro_coro.location})
            if _discoro_computation:
                msg = {'req': 'close', 'auth': _discoro_computation._auth}
                _discoro_config['serve'] = 1
                _discoro_coro.send(msg)
            else:
                break
        else:
            asyncoro.logger.warning('invalid command "%s" ignored' % _discoro_req)
            _discoro_client = _discoro_msg.get('client', None)
            if not isinstance(_discoro_client, Coro):
                continue
            _discoro_client.send(-1)

    # wait until all computations are done; process only 'close'
    while _discoro_job_coros:
        _discoro_msg = yield _discoro_coro.receive()
        if not isinstance(_discoro_msg, dict):
            continue
        _discoro_req = _discoro_msg.get('req', None)

        if _discoro_req == 'close':
            _discoro_auth = _discoro_msg.get('auth', None)
            if not _discoro_computation or _discoro_auth != _discoro_computation._auth:
                continue
            asyncoro.logger.debug('%s deleting computation "%s"' %
                                  (_discoro_coro.location, _discoro_computation._auth))

            if __name__ == '__mp_main__':  # Windows multiprocessing process
                for _discoro_var in list(globals()):
                    if _discoro_var not in _discoro_globals:
                        globals().pop(_discoro_var, None)
                        sys.modules['__mp_main__'].__dict__.pop(_discoro_var, None)
                globals().update(_discoro_globals)
                sys.modules['__mp_main__'].__dict__.update(_discoro_globals)
            else:
                for _discoro_var in list(globals()):
                    if _discoro_var not in _discoro_globals:
                        globals().pop(_discoro_var, None)
                globals().update(_discoro_globals)

            break
        else:
            asyncoro.logger.warning('invalid command "%s" ignored' % _discoro_req)
            _discoro_client = _discoro_msg.get('client', None)
            if not isinstance(_discoro_client, Coro):
                continue
            _discoro_client.send(-1)

    for _discoro_var in os.listdir(_discoro_dest_path):
        _discoro_var = os.path.join(_discoro_dest_path, _discoro_var)
        if os.path.isdir(_discoro_var) and not os.path.islink(_discoro_var):
            shutil.rmtree(_discoro_var, ignore_errors=True)
        else:
            os.remove(_discoro_var)
    if os.path.isfile(_discoro_pid_path):
        os.remove(_discoro_pid_path)
    _discoro_config['mp_queue'].put(_discoro_config['auth'])
    asyncoro.logger.debug('discoro server %s quit' % _discoro_coro.location)
예제 #2
0
class _DispyNode(object):
    """Internal use only.
    """
    def __init__(self, cpus, ip_addr=None, ext_ip_addr=None, node_port=None,
                 scheduler_node=None, scheduler_port=None,
                 dest_path_prefix='', secret='', keyfile=None, certfile=None,
                 max_file_size=None, zombie_interval=60):
        assert 0 < cpus <= multiprocessing.cpu_count()
        self.cpus = cpus
        if ip_addr:
            ip_addr = _node_ipaddr(ip_addr)
            if not ip_addr:
                raise Exception('invalid ip_addr')
        else:
            self.name = socket.gethostname()
            ip_addr = socket.gethostbyname(self.name)
        if ext_ip_addr:
            ext_ip_addr = _node_ipaddr(ext_ip_addr)
            if not ext_ip_addr:
                raise Exception('invalid ext_ip_addr')
        else:
            ext_ip_addr = ip_addr
        try:
            self.name = socket.gethostbyaddr(ext_ip_addr)[0]
        except:
            self.name = socket.gethostname()
        if not node_port:
            node_port = 51348
        if not scheduler_port:
            scheduler_port = 51347

        self.ip_addr = ip_addr
        self.ext_ip_addr = ext_ip_addr
        self.scheduler_port = scheduler_port
        self.pulse_interval = None
        self.keyfile = keyfile
        self.certfile = certfile
        if self.keyfile:
            self.keyfile = os.path.abspath(self.keyfile)
        if self.certfile:
            self.certfile = os.path.abspath(self.certfile)

        self.asyncoro = AsynCoro()

        self.tcp_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        if self.certfile:
            self.tcp_sock = ssl.wrap_socket(self.tcp_sock, keyfile=self.keyfile,
                                            certfile=self.certfile)
        self.tcp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.tcp_sock.bind((self.ip_addr, node_port))
        self.address = self.tcp_sock.getsockname()
        self.tcp_sock.listen(30)

        if dest_path_prefix:
            self.dest_path_prefix = dest_path_prefix.strip().rstrip(os.sep)
        else:
            self.dest_path_prefix = os.path.join(os.sep, 'tmp', 'dispy')
        if not os.path.isdir(self.dest_path_prefix):
            os.makedirs(self.dest_path_prefix)
            os.chmod(self.dest_path_prefix, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
        if max_file_size is None:
            max_file_size = MaxFileSize
        self.max_file_size = max_file_size

        self.avail_cpus = self.cpus
        self.computations = {}
        self.scheduler_ip_addr = None
        self.file_uses = {}
        self.job_infos = {}
        self.lock = asyncoro.Lock()
        self.terminate = False
        self.signature = os.urandom(20).encode('hex')
        self.auth_code = hashlib.sha1(self.signature + secret).hexdigest()
        self.zombie_interval = 60 * zombie_interval

        logger.debug('auth_code for %s: %s', ip_addr, self.auth_code)

        self.udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        self.udp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.udp_sock.bind(('', node_port))
        logger.info('serving %s cpus at %s:%s', self.cpus, self.ip_addr, node_port)
        logger.debug('tcp server at %s:%s', self.address[0], self.address[1])
        self.udp_sock = AsynCoroSocket(self.udp_sock, blocking=False)

        scheduler_ip_addr = _node_ipaddr(scheduler_node)

        self.reply_Q = multiprocessing.Queue()
        self.reply_Q_thread = threading.Thread(target=self.__reply_Q)
        self.reply_Q_thread.start()

        self.timer_coro = Coro(self.timer_task)
        # self.tcp_coro = Coro(self.tcp_server)
        self.udp_coro = Coro(self.udp_server, scheduler_ip_addr)

    def send_pong_msg(self, coro=None):
        ping_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        ping_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
        ping_sock = AsynCoroSocket(ping_sock, blocking=False)
        pong_msg = {'ip_addr':self.ext_ip_addr, 'name':self.name, 'port':self.address[1],
                    'cpus':self.cpus, 'sign':self.signature, 'version':_dispy_version}
        pong_msg = 'PONG:' + serialize(pong_msg)
        yield ping_sock.sendto(pong_msg, ('<broadcast>', self.scheduler_port))
        ping_sock.close()

    def udp_server(self, scheduler_ip_addr, coro=None):
        assert coro is not None
        coro.set_daemon()
        if self.avail_cpus == self.cpus:
            yield self.send_pong_msg(coro=coro)
        pong_msg = {'ip_addr':self.ext_ip_addr, 'name':self.name, 'port':self.address[1],
                    'cpus':self.cpus, 'sign':self.signature, 'version':_dispy_version}
        pong_msg = 'PONG:' + serialize(pong_msg)

        if scheduler_ip_addr:
            sock = AsynCoroSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
            try:
                yield sock.sendto(pong_msg, (scheduler_ip_addr, self.scheduler_port))
            except:
                logger.warning("Couldn't send ping message to %s:%s",
                               scheduler_ip_addr, self.scheduler_port)
            finally:
                sock.close()

        while True:
            msg, addr = yield self.udp_sock.recvfrom(1024)
            # TODO: process each message as separate Coro, so
            # exceptions are contained?
            if msg.startswith('PING:'):
                if self.cpus != self.avail_cpus:
                    logger.debug('Busy (%s/%s); ignoring ping message from %s',
                                 self.cpus, self.avail_cpus, addr[0])
                    continue
                try:
                    info = unserialize(msg[len('PING:'):])
                    socket.inet_aton(info['scheduler_ip_addr'])
                    assert isinstance(info['scheduler_port'], int)
                    assert info['version'] == _dispy_version
                    addr = (info['scheduler_ip_addr'], info['scheduler_port'])
                except:
                    # raise
                    logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                    continue
                yield self.udp_sock.sendto(pong_msg, addr)
            elif msg.startswith('PULSE:'):
                try:
                    info = unserialize(msg[len('PULSE:'):])
                    assert info['ip_addr'] == self.scheduler_ip_addr
                    yield self.lock.acquire()
                    for compute in self.computations.itervalues():
                        compute.last_pulse = time.time()
                    yield self.lock.release()
                except:
                    logger.warning('Ignoring PULSE from %s', addr[0])
            elif msg.startswith('SERVERPORT:'):
                try:
                    req = unserialize(msg[len('SERVERPORT:'):])
                    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    reply = {'ip_addr':self.address[0], 'port':self.address[1],
                             'sign':self.signature, 'version':_dispy_version}
                    sock = AsynCoroSocket(sock, blocking=False)
                    sock.settimeout(1)
                    yield sock.sendto(serialize(reply), (req['ip_addr'], req['port']))
                    sock.close()
                except:
                    logger.debug(traceback.format_exc())
                    # pass
            else:
                logger.warning('Ignoring ping message from %s', addr[0])

    def tcp_serve_task(self, conn, addr, coro=None):
        conn = AsynCoroSocket(conn, blocking=False,
                              keyfile=self.keyfile, certfile=self.certfile)
        def job_request_task(msg):
            assert coro is not None
            try:
                _job = unserialize(msg)
            except:
                logger.debug('Ignoring job request from %s', addr[0])
                logger.debug(traceback.format_exc())
                raise StopIteration
            yield self.lock.acquire()
            compute = self.computations.get(_job.compute_id, None)
            if compute is not None:
                if compute.scheduler_ip_addr != self.scheduler_ip_addr:
                    compute = None
            yield self.lock.release()
            if self.avail_cpus == 0:
                logger.warning('All cpus busy')
                try:
                    yield conn.send_msg('NAK (all cpus busy)')
                except:
                    pass
                raise StopIteration
            elif compute is None:
                logger.warning('Invalid computation %s', _job.compute_id)
                try:
                    yield conn.send_msg('NAK (invalid computation %s)' % _job.compute_id)
                except:
                    pass
                raise StopIteration

            reply_addr = (compute.scheduler_ip_addr, compute.job_result_port)
            logger.debug('New job id %s from %s', _job.uid, addr[0])
            files = []
            for f in _job.files:
                tgt = os.path.join(compute.dest_path, os.path.basename(f['name']))
                try:
                    fd = open(tgt, 'wb')
                    fd.write(f['data'])
                    fd.close()
                except:
                    logger.warning('Could not save file "%s"', tgt)
                    continue
                try:
                    os.utime(tgt, (f['stat'].st_atime, f['stat'].st_mtime))
                    os.chmod(tgt, stat.S_IMODE(f['stat'].st_mode))
                except:
                    logger.debug('Could not set modes for "%s"', tgt)
                files.append(tgt)
            _job.files = files

            if compute.type == _Compute.func_type:
                reply = _JobReply(_job, self.ext_ip_addr)
                job_info = _DispyJobInfo(reply, reply_addr, compute)
                args = (job_info, self.certfile, self.keyfile,
                        _job.args, _job.kwargs, self.reply_Q,
                        compute.name, compute.code, compute.dest_path, _job.files)
                try:
                    yield conn.send_msg('ACK')
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
                    raise StopIteration
                job_info.job_reply.status = DispyJob.Running
                job_info.proc = multiprocessing.Process(target=_dispy_job_func, args=args)
                yield self.lock.acquire()
                self.avail_cpus -= 1
                compute.pending_jobs += 1
                self.job_infos[_job.uid] = job_info
                self.lock.release()
                job_info.proc.start()
                raise StopIteration
            elif compute.type == _Compute.prog_type:
                try:
                    yield conn.send_msg('ACK')
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))
                    raise StopIteration
                reply = _JobReply(_job, self.ext_ip_addr)
                job_info = _DispyJobInfo(reply, reply_addr, compute)
                job_info.job_reply.status = DispyJob.Running
                yield self.lock.acquire()
                self.job_infos[_job.uid] = job_info
                self.avail_cpus -= 1
                compute.pending_jobs += 1
                yield self.lock.release()
                prog_thread = threading.Thread(target=self.__job_program, args=(_job, job_info))
                prog_thread.start()
                raise StopIteration
            else:
                try:
                    yield conn.send_msg('NAK (invalid computation type "%s")' % compute.type)
                except:
                    logger.warning('Failed to send response for new job to %s', str(addr))

        def add_computation_task(msg):
            assert coro is not None
            try:
                compute = unserialize(msg)
            except:
                logger.debug('Ignoring computation request from %s', addr[0])
                try:
                    yield conn.send_msg('Invalid computation request')
                except:
                    logger.warning('Failed to send reply to %s', str(addr))
                raise StopIteration
            yield self.lock.acquire()
            if not ((self.scheduler_ip_addr is None) or
                    (self.scheduler_ip_addr == compute.scheduler_ip_addr and \
                     self.scheduler_port == compute.scheduler_port)):
                logger.debug('Ignoring computation request from %s: %s, %s, %s',
                             compute.scheduler_ip_addr, self.scheduler_ip_addr,
                             self.avail_cpus, self.cpus)
                self.lock.release()
                try:
                    yield conn.send_msg('Busy')
                except:
                    pass
                raise StopIteration

            resp = 'ACK'
            if compute.dest_path and isinstance(compute.dest_path, str):
                compute.dest_path = compute.dest_path.strip(os.sep)
            else:
                for x in xrange(20):
                    compute.dest_path = os.urandom(8).encode('hex')
                    if compute.dest_path.find(os.sep) >= 0:
                        continue
                    if not os.path.isdir(os.path.join(self.dest_path_prefix, compute.dest_path)):
                        break
                else:
                    logger.warning('Failed to create unique dest_path: %s', compute.dest_path)
                    resp = 'NACK'
            compute.dest_path = os.path.join(self.dest_path_prefix, compute.dest_path)
            try:
                os.makedirs(compute.dest_path)
                os.chmod(compute.dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
                logger.debug('dest_path for "%s": %s', compute.name, compute.dest_path)
            except:
                logger.warning('Invalid destination path: "%s"', compute.dest_path)
                if os.path.isdir(compute.dest_path):
                    os.rmdir(compute.dest_path)
                self.lock.release()
                try:
                    yield conn.send_msg('NACK (Invalid dest_path)')
                except:
                    logger.warning('Failed to send reply to %s', str(addr))
                raise StopIteration
            if compute.id in self.computations:
                logger.warning('Computation "%s" (%s) is being replaced',
                               compute.name, compute.id)
            setattr(compute, 'last_pulse', time.time())
            setattr(compute, 'pending_jobs', 0)
            setattr(compute, 'pending_results', 0)
            setattr(compute, 'zombie', False)
            logger.debug('xfer_files given: %s', ','.join(xf.name for xf in compute.xfer_files))
            if compute.type == _Compute.func_type:
                try:
                    code = compile(compute.code, '<string>', 'exec')
                except:
                    logger.warning('Computation "%s" could not be compiled', compute.name)
                    if os.path.isdir(compute.dest_path):
                        os.rmdir(compute.dest_path)
                    self.lock.release()
                    try:
                        yield conn.send_msg('NACK (Compilation failed)')
                    except:
                        logger.warning('Failed to send reply to %s', str(addr))
                    raise StopIteration
                compute.code = marshal.dumps(code)
            elif compute.type == _Compute.prog_type:
                assert not compute.code
                compute.name = os.path.join(compute.dest_path, os.path.basename(compute.name))

            xfer_files = []
            for xf in compute.xfer_files:
                tgt = os.path.join(compute.dest_path, os.path.basename(xf.name))
                try:
                    if _same_file(tgt, xf):
                        logger.debug('Ignoring file "%s" / "%s"', xf.name, tgt)
                        if tgt not in self.file_uses:
                            self.file_uses[tgt] = 0
                        self.file_uses[tgt] += 1
                        continue
                except:
                    pass
                if self.max_file_size and xf.stat_buf.st_size > self.max_file_size:
                    resp = 'NACK (file "%s" too big)' % xf.name
                else:
                    xfer_files.append(xf)
            if resp == 'ACK' and ((self.scheduler_ip_addr is not None) and \
                                  (self.scheduler_ip_addr != compute.scheduler_ip_addr)):
                resp = 'NACK (busy)'
            if resp == 'ACK':
                self.computations[compute.id] = compute
                self.scheduler_ip_addr = compute.scheduler_ip_addr
                self.scheduler_port = compute.scheduler_port
                self.pulse_interval = compute.pulse_interval
                self.lock.release()
                if xfer_files:
                    resp += ':XFER_FILES:' + serialize(xfer_files)
                try:
                    yield conn.send_msg(resp)
                except:
                    assert self.scheduler_ip_addr == compute.scheduler_ip_addr
                    yield self.lock.acquire()
                    del self.computations[compute.id]
                    self.scheduler_ip_addr = None
                    self.scheduler_port = None
                    self.pulse_interval = None
                    self.lock.release()
                else:
                    self.timer_coro.resume(True)
            else:
                self.lock.release()
                if os.path.isdir(compute.dest_path):
                    os.rmdir(compute.dest_path)
                try:
                    yield conn.send_msg(resp)
                except:
                    pass

        def xfer_file_task(msg):
            assert coro is not None
            try:
                xf = unserialize(msg)
            except:
                logger.debug('Ignoring file trasnfer request from %s', addr[0])
                raise StopIteration
            resp = ''
            if xf.compute_id not in self.computations:
                logger.error('computation "%s" is invalid' % xf.compute_id)
                raise StopIteration
            tgt = os.path.join(self.computations[xf.compute_id].dest_path,
                               os.path.basename(xf.name))
            if os.path.isfile(tgt):
                if _same_file(tgt, xf):
                    yield self.lock.acquire()
                    if tgt in self.file_uses:
                        self.file_uses[tgt] += 1
                    else:
                        self.file_uses[tgt] = 1
                    yield self.lock.release()
                    resp = 'ACK'
                else:
                    logger.warning('File "%s" already exists with different status as "%s"',
                                   xf.name, tgt)
            if not resp:
                logger.debug('Copying file %s to %s (%s)', xf.name, tgt, xf.stat_buf.st_size)
                try:
                    fd = open(tgt, 'wb')
                    n = 0
                    while n < xf.stat_buf.st_size:
                        data = yield conn.recvall(min(xf.stat_buf.st_size-n, 10240000))
                        if not data:
                            break
                        fd.write(data)
                        n += len(data)
                        if self.max_file_size and n > self.max_file_size:
                            logger.warning('File "%s" is too big (%s); it is truncated', tgt, n)
                            break
                    fd.close()
                    if n < xf.stat_buf.st_size:
                        resp = 'NAK (read only %s bytes)' % n
                    else:
                        resp = 'ACK'
                        logger.debug('Copied file %s, %s', tgt, resp)
                        os.utime(tgt, (xf.stat_buf.st_atime, xf.stat_buf.st_mtime))
                        os.chmod(tgt, stat.S_IMODE(xf.stat_buf.st_mode))
                        self.file_uses[tgt] = 1
                except:
                    logger.warning('Copying file "%s" failed with "%s"',
                                   xf.name, traceback.format_exc())
                    resp = 'NACK'
                try:
                    yield conn.send_msg(resp)
                except:
                    logger.debug('Could not send reply for "%s"', xf.name)
            raise StopIteration # xfer_file_task

        def terminate_job_task(msg):
            assert coro is not None
            yield self.lock.acquire()
            try:
                _job = unserialize(msg)
                compute = self.computations[_job.compute_id]
                assert addr[0] == compute.scheduler_ip_addr
                job_info = self.job_infos.pop(_job.uid, None)
            except:
                logger.debug('Ignoring job request from %s', addr[0])
                raise StopIteration
            finally:
                self.lock.release()
            if job_info is None:
                logger.debug('Job %s completed; ignoring cancel request from %s',
                             _job.uid, addr[0])
                raise StopIteration
            logger.debug('Terminating job %s', _job.uid)
            job_info.proc.terminate()
            if isinstance(job_info.proc, multiprocessing.Process):
                for x in xrange(20):
                    if job_info.proc.is_alive():
                        yield coro.sleep(0.1)
                    else:
                        logger.debug('Process "%s" for job %s terminated', compute.name, _job.uid)
                        break
                else:
                    logger.warning('Could not kill process %s', compute.name)
                    raise StopIteration
            else:
                assert isinstance(job_info.proc, subprocess.Popen)
                for x in xrange(20):
                    rc = job_info.proc.poll()
                    logger.debug('Program "%s" for job %s terminated with %s',
                                 compute.name, _job.uid, rc)
                    if rc is not None:
                        break
                    if x == 10:
                        logger.debug('Killing job %s', _job.uid)
                        job_info.proc.kill()
                    yield coro.sleep(0.1)
                else:
                    logger.warning('Could not kill process %s', compute.name)
                    raise StopIteration
            reply_addr = (addr[0], compute.job_result_port)
            reply = _JobReply(_job, self.ext_ip_addr)
            job_info = _DispyJobInfo(reply, reply_addr, compute)
            reply.status = DispyJob.Terminated
            yield self._send_job_reply(job_info, resending=False, coro=coro)

        def retrieve_job_task(msg):
            assert coro is not None
            try:
                req = unserialize(msg)
                assert req['uid'] is not None
                assert req['hash'] is not None
                assert req['compute_id'] is not None
            except:
                resp = serialize('Invalid job')
                try:
                    yield conn.send_msg(resp)
                except:
                    pass
                raise StopIteration

            job_info = self.job_infos.get(req['uid'], None)
            resp = None
            if job_info is not None:
                try:
                    yield conn.send_msg(serialize(job_info.job_reply))
                    ack = yield conn.recv_msg()
                    # no need to check ack
                except:
                    logger.debug('Could not send reply for job %s', req['uid'])
                raise StopIteration

            for d in os.listdir(self.dest_path_prefix):
                info_file = os.path.join(self.dest_path_prefix, d,
                                         '_dispy_job_reply_%s' % req['uid'])
                if os.path.isfile(info_file):
                    try:
                        fd = open(info_file, 'rb')
                        job_reply = pickle.load(fd)
                        fd.close()
                    except:
                        job_reply = None
                    if hasattr(job_reply, 'hash') and job_reply.hash == req['hash']:
                        try:
                            yield conn.send_msg(serialize(job_reply))
                            ack = yield conn.recv_msg()
                            assert ack == 'ACK'
                        except:
                            logger.debug('Could not send reply for job %s', req['uid'])
                            raise StopIteration
                        try:
                            os.remove(info_file)
                            yield self.lock.acquire()
                            compute = self.computations.get(req['compute_id'], None)
                            if compute is not None:
                                compute.pending_results -= 1
                                if compute.pending_results == 0:
                                    compute.zombie = True
                                    self.cleanup_computation(compute)
                            self.lock.release()
                        except:
                            logger.debug('Could not remove "%s"', info_file)
                        raise StopIteration
            else:
                resp = serialize('Invalid job: %s' % req['uid'])

            if resp:
                try:
                    yield conn.send_msg(resp)
                except:
                    pass

        # tcp_serve_task starts
        try:
            req = yield conn.recvall(len(self.auth_code))
            assert req == self.auth_code
        except:
            logger.warning('Ignoring request; invalid client authentication?')
            conn.close()
            raise StopIteration
        msg = yield conn.recv_msg()
        if not msg:
            conn.close()
            raise StopIteration
        if msg.startswith('JOB:'):
            msg = msg[len('JOB:'):]
            yield job_request_task(msg)
            conn.close()
        elif msg.startswith('COMPUTE:'):
            msg = msg[len('COMPUTE:'):]
            yield add_computation_task(msg)
            conn.close()
        elif msg.startswith('FILEXFER:'):
            msg = msg[len('FILEXFER:'):]
            yield xfer_file_task(msg)
            conn.close()
        elif msg.startswith('DEL_COMPUTE:'):
            msg = msg[len('DEL_COMPUTE:'):]
            try:
                info = unserialize(msg)
                compute_id = info['ID']
                yield self.lock.acquire()
                compute = self.computations.get(compute_id, None)
                if compute is None:
                    logger.warning('Computation "%s" is not valid', compute_id)
                else:
                    compute.zombie = True
                    self.cleanup_computation(compute)
                self.lock.release()
            except:
                logger.debug('Deleting computation failed with %s', traceback.format_exc())
                # raise
            conn.close()
        elif msg.startswith('TERMINATE_JOB:'):
            msg = msg[len('TERMINATE_JOB:'):]
            yield terminate_job_task(msg)
            conn.close()
        elif msg.startswith('RETRIEVE_JOB:'):
            msg = msg[len('RETRIEVE_JOB:'):]
            yield retrieve_job_task(msg)
            conn.close()
        else:
            logger.warning('Invalid request "%s" from %s',
                           msg[:min(10, len(msg))], addr[0])
            resp = 'NAK (invalid command: %s)' % (msg[:min(10, len(msg))])
            try:
                yield conn.send_msg(resp)
            except:
                logger.warning('Failed to send reply to %s', str(addr))
            conn.close()

    def timer_task(self, coro=None):
        coro.set_daemon()
        reset = True
        last_pulse_time = last_zombie_time = time.time()
        while True:
            if reset:
                if self.pulse_interval and self.zombie_interval:
                    timeout = min(self.pulse_interval, self.zombie_interval)
                    self.zombie_interval = max(5 * self.pulse_interval, self.zombie_interval)
                else:
                    timeout = max(self.pulse_interval, self.zombie_interval)
                    self.zombie_interval = self.zombie_interval

            reset = yield coro.suspend(timeout)

            now = time.time()
            if self.pulse_interval and (now - last_pulse_time) >= self.pulse_interval:
                n = self.cpus - self.avail_cpus
                assert n >= 0
                if n > 0 and self.scheduler_ip_addr:
                    last_pulse_time = now
                    msg = 'PULSE:' + serialize({'ip_addr':self.ext_ip_addr,
                                                'port':self.udp_sock.getsockname()[1], 'cpus':n})
                    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    sock = AsynCoroSocket(sock, blocking=False)
                    sock.settimeout(1)
                    yield sock.sendto(msg, (self.scheduler_ip_addr, self.scheduler_port))
                    sock.close()
            if self.zombie_interval and (now - last_zombie_time) >= self.zombie_interval:
                last_zombie_time = now
                yield self.lock.acquire()
                for compute in self.computations.itervalues():
                    if (now - compute.last_pulse) > self.zombie_interval:
                        compute.zombie = True
                zombies = [compute for compute in self.computations.itervalues() \
                           if compute.zombie and compute.pending_jobs == 0]
                for compute in zombies:
                    logger.debug('Deleting zombie computation "%s"', compute.name)
                    self.cleanup_computation(compute)
                phoenix = [compute for compute in self.computations.itervalues() \
                           if not compute.zombie and compute.pending_results]
                for compute in phoenix:
                    files = [f for f in os.listdir(compute.dest_path) \
                             if f.startswith('_dispy_job_reply_')]
                    # limit number queued so as not to take up too much time
                    files = files[:min(len(files), 128)]
                    for f in files:
                        result_file = os.path.join(compute.dest_path, f)
                        try:
                            fd = open(result_file, 'rb')
                            job_result = pickle.load(fd)
                            fd.close()
                        except:
                            logger.debug('Could not load "%s"', result_file)
                            logger.debug(traceback.format_exc())
                            continue
                        try:
                            os.remove(result_file)
                        except:
                            logger.debug('Could not remove "%s"', result_file)
                        compute.pending_results -= 1
                        job_info = _DispyJobInfo(job_result, (compute.scheduler_ip_addr,
                                                              compute.job_result_port), compute)
                        Coro(self._send_job_reply, job_info, resending=True)
                self.lock.release()
                for compute in zombies:
                    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    sock = AsynCoroSocket(sock, blocking=False)
                    sock.settimeout(1)
                    logger.debug('Sending TERMINATE to %s', compute.scheduler_ip_addr)
                    data = serialize({'ip_addr':self.address[0], 'port':self.address[1],
                                      'sign':self.signature})
                    yield sock.sendto('TERMINATED:%s' % data, (compute.scheduler_ip_addr,
                                                               compute.scheduler_port))
                    sock.close()
                if self.scheduler_ip_addr is None and self.avail_cpus == self.cpus:
                    self.pulse_interval = None
                    reset = True
                    yield self.send_pong_msg(coro=coro)

    def __job_program(self, _job, job_info):
        compute = self.computations[_job.compute_id]
        program = [compute.name]
        args = unserialize(_job.args)
        program.extend(args)
        logger.debug('Executing "%s"', str(program))
        reply = job_info.job_reply
        try:
            os.chdir(compute.dest_path)
            env = {}
            env.update(os.environ)
            env['PATH'] = compute.dest_path + ':' + env['PATH']
            job_info.proc = subprocess.Popen(program, stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE, env=env)

            assert isinstance(job_info.proc, subprocess.Popen)
            reply.stdout, reply.stderr = job_info.proc.communicate()
            reply.result = job_info.proc.returncode
            reply.status = DispyJob.Finished
        except:
            logger.debug('Executing %s failed with %s', str(program), str(sys.exc_info()))
            reply.exception = traceback.format_exc()
            reply.status = DispyJob.Terminated
        self.reply_Q.put(reply)

    def __reply_Q(self):
        while True:
            job_reply = self.reply_Q.get()
            if job_reply is None:
                break
            job_info = self.job_infos.pop(job_reply.uid, None)
            if job_info is not None:
                if job_info.proc is not None:
                    if isinstance(job_info.proc, multiprocessing.Process):
                        job_info.proc.join(2)
                    else:
                        job_info.proc.wait()
                job_info.job_reply = job_reply
                Coro(self._send_job_reply, job_info, resending=False).value()

    def _send_job_reply(self, job_info, resending=False, coro=None):
        """Internal use only.
        """
        assert coro is not None
        job_reply = job_info.job_reply
        logger.debug('Sending result for job %s (%s) to %s',
                     job_reply.uid, job_reply.status, str(job_info.reply_addr))
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock = AsynCoroSocket(sock, blocking=False, certfile=self.certfile, keyfile=self.keyfile)
        sock.settimeout(2)
        try:
            yield sock.connect(job_info.reply_addr)
            yield sock.send_msg(serialize(job_reply))
            ack = yield sock.recv_msg()
            assert ack == 'ACK'
        except:
            logger.error("Couldn't send results for %s to %s",
                         job_reply.uid, str(job_info.reply_addr))
            # store job result even if computation has not enabled
            # fault recovery; user may be able to access node and
            # retrieve result manually
            f = os.path.join(job_info.compute_dest_path, '_dispy_job_reply_%s' % job_reply.uid)
            logger.debug('storing results for job %s', job_reply.uid)
            try:
                fd = open(f, 'wb')
                pickle.dump(job_reply, fd)
                fd.close()
            except:
                logger.debug('Could not save results for job %s', job_reply.uid)
            else:
                yield self.lock.acquire()
                compute = self.computations.get(job_info.compute_id, None)
                if compute is not None:
                    compute.pending_results += 1
                self.lock.release()
        finally:
            sock.close()
            if not resending:
                yield self.lock.acquire()
                self.avail_cpus += 1
                compute = self.computations.get(job_info.compute_id, None)
                if compute is None:
                    logger.warning('Computation for %s / %s is invalid!',
                                   job_reply.uid, job_info.compute_id)
                else:
                    # technically last_pulse should be updated only
                    # when successfully sent reply, but no harm if done
                    # otherwise, too
                    compute.last_pulse = time.time()
                    compute.pending_jobs -= 1
                    if compute.pending_jobs == 0 and compute.zombie:
                        self.cleanup_computation(compute)
                self.lock.release()

    def cleanup_computation(self, compute):
        # called with lock held
        if not compute.zombie:
            return
        if compute.pending_jobs != 0:
            logger.debug('pending jobs for computation "%s"/%s: %s',
                         compute.name, compute.id, compute.pending_jobs)
            if compute.pending_jobs > 0:
                return

        del self.computations[compute.id]
        if compute.scheduler_ip_addr == self.scheduler_ip_addr and \
               all(c.scheduler_ip_addr != self.scheduler_ip_addr \
                   for c in self.computations.itervalues()):
            assert self.avail_cpus == self.cpus
            self.scheduler_ip_addr = None
            self.pulse_interval = None

        if self.scheduler_ip_addr is None and self.avail_cpus == self.cpus:
            self.timer_coro.resume(True)
            Coro(self.send_pong_msg)
        if compute.cleanup is False:
            return
        for xf in compute.xfer_files:
            tgt = os.path.join(compute.dest_path, os.path.basename(xf.name))
            if tgt not in self.file_uses:
                logger.debug('File "%s" is unknown', tgt)
                continue
            self.file_uses[tgt] -= 1
            if self.file_uses[tgt] == 0:
                del self.file_uses[tgt]
                if tgt == xf:
                    logger.debug('Not removing file "%s"', xf.name)
                else:
                    logger.debug('Removing file "%s"', tgt)
                    try:
                        os.remove(tgt)
                        if os.path.splitext(tgt)[1] == '.py' and os.path.isfile(tgt + 'c'):
                            os.remove(tgt + 'c')
                    except:
                        logger.warning('Could not remove file "%s"', tgt)

        if os.path.isdir(compute.dest_path) and \
               compute.dest_path.startswith(self.dest_path_prefix) and \
               len(compute.dest_path) > len(self.dest_path_prefix) and \
               len(os.listdir(compute.dest_path)) == 0:
            logger.debug('Removing "%s"', compute.dest_path)
            try:
                os.rmdir(compute.dest_path)
            except:
                logger.warning('Could not remove directory "%s"', compute.dest_path)

    def shutdown(self):
        def _shutdown(self, coro=None):
            assert coro is not None
            yield self.lock.acquire()
            job_infos = self.job_infos
            self.job_infos = {}
            computations = self.computations.items()
            self.computations = {}
            if self.reply_Q:
                self.reply_Q.put(None)
            self.lock.release()
            for uid, job_info in job_infos.iteritems():
                job_info.proc.terminate()
                logger.debug('process for %s is killed', uid)
                if isinstance(job_info.proc, multiprocessing.Process):
                    job_info.proc.join(2)
                else:
                    job_info.proc.wait()
            for cid, compute in computations:
                sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                sock = AsynCoroSocket(sock, blocking=False)
                sock.settimeout(2)
                logger.debug('Sending TERMINATE to %s', compute.scheduler_ip_addr)
                data = serialize({'ip_addr':self.address[0], 'port':self.address[1],
                                  'sign':self.signature})
                yield sock.sendto('TERMINATED:' + data, (compute.scheduler_ip_addr,
                                                         compute.scheduler_port))
                sock.close()

        Coro(_shutdown, self).value()
        self.asyncoro.join()
        self.asyncoro.terminate()