Example #1
0
    def listen_udp_proc(self, coro=None):
        coro.set_daemon()
        bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        if self.scheduler_ip_addrs and self.scheduler_port:
            relay_request = {
                "ip_addrs": self.scheduler_ip_addrs,
                "port": self.scheduler_port,
                "version": __version__,
                "sign": None,
            }
            bc_sock.sendto("PING:".encode() + asyncoro.serialize(relay_request), ("<broadcast>", self.node_port))
        bc_sock.close()

        listen_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
        listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        listen_sock.bind(("", self.listen_port))

        while 1:
            msg, addr = yield listen_sock.recvfrom(1024)
            if not msg.startswith("PING:".encode()):
                logger.debug('Ignoring message "%s" from %s', msg[: min(len(msg), 5)], addr[0])
                continue
            logger.debug("Ping message from %s (%s)", addr[0], addr[1])
            try:
                info = asyncoro.deserialize(msg[len("PING:".encode()) :])
                if info["version"] != __version__:
                    logger.warning(
                        "Ignoring %s due to version mismatch: %s / %s", info["ip_addrs"], info["version"], __version__
                    )
                    continue
                self.scheduler_ip_addrs = info["ip_addrs"] + [addr[0]]
                self.scheduler_port = info["port"]
            except:
                logger.debug("Ignoring ping message from %s (%s)", addr[0], addr[1])
                logger.debug(traceback.format_exc())
                continue
            if info.get("relay", None):
                logger.debug("Ignoring ping back (from %s)", addr[0])
                continue
            logger.debug("relaying ping from %s / %s", info["ip_addrs"], addr[0])
            if self.node_port == self.listen_port:
                info["relay"] = "y"  # 'check if this message loops back to self
            bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
            bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
            yield bc_sock.sendto("PING:".encode() + asyncoro.serialize(info), ("<broadcast>", self.node_port))
            bc_sock.close()
Example #2
0
 def sched_udp_proc(self, coro=None):
     coro.set_daemon()
     sched_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
     sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
     sched_sock.bind(('', self.scheduler_port))
     while 1:
         msg, addr = yield sched_sock.recvfrom(1024)
         if (not msg.startswith('PING:'.encode()) or
            not self.scheduler_ip_addrs or not self.scheduler_port):
             logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
             continue
         try:
             info = asyncoro.unserialize(msg[len('PING:'.encode()):])
             logger.debug('sched_sock: %s', info)
             assert info['version'] == __version__
             # assert isinstance(info['cpus'], int)
         except:
             logger.debug(traceback.format_exc())
         msg = {'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port,
                'version': __version__}
         if info.get('relay', None):
             logger.debug('Ignoring ping back from %s: %s', addr[0], info)
             continue
         msg['relay'] = 'y'
         relay_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
         yield relay_sock.sendto('PING:'.encode() + asyncoro.serialize(msg),
                                 (info['ip_addr'], info['port']))
         relay_sock.close()
Example #3
0
 def _shutdown(self, coro=None):
     assert coro is not None
     yield self.lock.acquire()
     job_infos = self.job_infos
     self.job_infos = {}
     computations = self.computations.items()
     self.computations = {}
     if self.reply_Q:
         self.reply_Q.put(None)
     self.lock.release()
     for uid, job_info in job_infos.iteritems():
         job_info.proc.terminate()
         logger.debug('process for %s is killed', uid)
         if isinstance(job_info.proc, multiprocessing.Process):
             job_info.proc.join(2)
         else:
             job_info.proc.wait()
     for cid, compute in computations:
         sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
         sock = AsynCoroSocket(sock, blocking=False)
         sock.settimeout(2)
         logger.debug('Sending TERMINATE to %s', compute.scheduler_ip_addr)
         data = serialize({'ip_addr':self.address[0], 'port':self.address[1],
                           'sign':self.signature})
         yield sock.sendto('TERMINATED:' + data, (compute.scheduler_ip_addr,
                                                  compute.scheduler_port))
         sock.close()
Example #4
0
def dispy_provisional_result(result):
    """Sends provisional result of computation back to the client.

    In some cases, such as optimizations, computations may send
    current (best) result to the client and continue computation (for
    next iteration) so that the client may decide to terminate
    computations based on the results or alter computations if
    necessary. The computations can use this function in such cases
    with the current result of computation as argument.
    """

    __dispy_job_reply = __dispy_job_info.job_reply
    logger.debug('Sending provisional result for job %s to %s',
                 __dispy_job_reply.uid, __dispy_job_info.reply_addr)
    __dispy_job_reply.status = DispyJob.ProvisionalResult
    __dispy_job_reply.result = result
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock = AsynCoroSocket(sock, blocking=True, keyfile=__dispy_job_keyfile,
                          certfile=__dispy_job_certfile)
    sock.settimeout(2)
    try:
        sock.connect(__dispy_job_info.reply_addr)
        sock.send_msg(serialize(__dispy_job_reply))
        ack = sock.recv_msg()
    except:
        logger.warning("Couldn't send provisional results %s:\n%s",
                       str(result), traceback.format_exc())
    sock.close()
Example #5
0
 def sched_udp_proc(self, coro=None):
     coro.set_daemon()
     sched_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
     sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
     sched_sock.bind(("", self.scheduler_port))
     while 1:
         msg, addr = yield sched_sock.recvfrom(1024)
         if not msg.startswith("PING:".encode()) or not self.scheduler_ip_addrs or not self.scheduler_port:
             logger.debug("Ignoring ping message from %s (%s)", addr[0], addr[1])
             continue
         try:
             info = asyncoro.deserialize(msg[len("PING:".encode()) :])
             logger.debug("sched_sock: %s", info)
             assert info["version"] == __version__
             # assert isinstance(info['cpus'], int)
         except:
             logger.debug(traceback.format_exc())
         msg = {"ip_addrs": self.scheduler_ip_addrs, "port": self.scheduler_port, "version": __version__}
         if info.get("relay", None):
             logger.debug("Ignoring ping back from %s: %s", addr[0], info)
             continue
         msg["relay"] = "y"
         relay_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
         yield relay_sock.sendto("PING:".encode() + asyncoro.serialize(msg), (info["ip_addr"], info["port"]))
         relay_sock.close()
Example #6
0
    def listen_udp_proc(self, coro=None):
        coro.set_daemon()
        bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        if self.scheduler_ip_addrs and self.scheduler_port:
            relay_request = {'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port,
                             'version': __version__, 'sign': None}
            bc_sock.sendto('PING:'.encode() + asyncoro.serialize(relay_request),
                           ('<broadcast>', self.node_port))
        bc_sock.close()

        listen_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
        listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        listen_sock.bind(('', self.listen_port))

        while 1:
            msg, addr = yield listen_sock.recvfrom(1024)
            if not msg.startswith('PING:'.encode()):
                logger.debug('Ignoring message "%s" from %s',
                             msg[:min(len(msg), 5)], addr[0])
                continue
            logger.debug('Ping message from %s (%s)', addr[0], addr[1])
            try:
                info = asyncoro.unserialize(msg[len('PING:'.encode()):])
                if info['version'] != __version__:
                    logger.warning('Ignoring %s due to version mismatch: %s / %s',
                                   info['ip_addrs'], info['version'], __version__)
                    continue
                self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]]
                self.scheduler_port = info['port']
            except:
                logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                logger.debug(traceback.format_exc())
                continue
            if info.get('relay', None):
                logger.debug('Ignoring ping back (from %s)', addr[0])
                continue
            logger.debug('relaying ping from %s / %s', info['ip_addrs'], addr[0])
            if self.node_port == self.listen_port:
                info['relay'] = 'y'  # 'check if this message loops back to self
            bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
            bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
            yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info),
                                 ('<broadcast>', self.node_port))
            bc_sock.close()
Example #7
0
 def send_pong_msg(self, coro=None):
     ping_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
     ping_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
     ping_sock = AsynCoroSocket(ping_sock, blocking=False)
     pong_msg = {'ip_addr':self.ext_ip_addr, 'name':self.name, 'port':self.address[1],
                 'cpus':self.cpus, 'sign':self.signature, 'version':_dispy_version}
     pong_msg = 'PONG:' + serialize(pong_msg)
     yield ping_sock.sendto(pong_msg, ('<broadcast>', self.scheduler_port))
     ping_sock.close()
Example #8
0
 def _send_job_reply(self, job_info, resending=False, coro=None):
     """Internal use only.
     """
     assert coro is not None
     job_reply = job_info.job_reply
     logger.debug('Sending result for job %s (%s) to %s',
                  job_reply.uid, job_reply.status, str(job_info.reply_addr))
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     sock = AsynCoroSocket(sock, blocking=False, certfile=self.certfile, keyfile=self.keyfile)
     sock.settimeout(2)
     try:
         yield sock.connect(job_info.reply_addr)
         yield sock.send_msg(serialize(job_reply))
         ack = yield sock.recv_msg()
         assert ack == 'ACK'
     except:
         logger.error("Couldn't send results for %s to %s",
                      job_reply.uid, str(job_info.reply_addr))
         # store job result even if computation has not enabled
         # fault recovery; user may be able to access node and
         # retrieve result manually
         f = os.path.join(job_info.compute_dest_path, '_dispy_job_reply_%s' % job_reply.uid)
         logger.debug('storing results for job %s', job_reply.uid)
         try:
             fd = open(f, 'wb')
             pickle.dump(job_reply, fd)
             fd.close()
         except:
             logger.debug('Could not save results for job %s', job_reply.uid)
         else:
             yield self.lock.acquire()
             compute = self.computations.get(job_info.compute_id, None)
             if compute is not None:
                 compute.pending_results += 1
             self.lock.release()
     finally:
         sock.close()
         if not resending:
             yield self.lock.acquire()
             self.avail_cpus += 1
             compute = self.computations.get(job_info.compute_id, None)
             if compute is None:
                 logger.warning('Computation for %s / %s is invalid!',
                                job_reply.uid, job_info.compute_id)
             else:
                 # technically last_pulse should be updated only
                 # when successfully sent reply, but no harm if done
                 # otherwise, too
                 compute.last_pulse = time.time()
                 compute.pending_jobs -= 1
                 if compute.pending_jobs == 0 and compute.zombie:
                     self.cleanup_computation(compute)
             self.lock.release()
Example #9
0
 def _send_job_reply(self, job_info, resending=False, coro=None):
     """Internal use only.
     """
     assert coro is not None
     job_reply = job_info.job_reply
     logger.debug('Sending result for job %s (%s) to %s',
                  job_reply.uid, job_reply.status, str(job_info.reply_addr))
     if not resending:
         self.avail_cpus += 1
         assert self.avail_cpus <= self.num_cpus
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     sock = AsynCoroSocket(sock, blocking=False, certfile=self.certfile, keyfile=self.keyfile)
     sock.settimeout(5)
     try:
         yield sock.connect(job_info.reply_addr)
         yield sock.send_msg(serialize(job_reply))
         ack = yield sock.recv_msg()
         assert ack == 'ACK'
         compute = self.computations.get(job_info.compute_id, None)
         if compute is not None:
             compute.last_pulse = time.time()
     except:
         logger.error("Couldn't send results for %s to %s : %s",
                      job_reply.uid, str(job_info.reply_addr), traceback.format_exc())
         # store job result even if computation has not enabled
         # fault recovery; user may be able to access node and
         # retrieve result manually
         f = os.path.join(job_info.compute_dest_path, '_dispy_job_reply_%s' % job_reply.uid)
         logger.debug('storing results for job %s', job_reply.uid)
         try:
             fd = open(f, 'wb')
             pickle.dump(job_reply, fd)
             fd.close()
         except:
             logger.debug('Could not save results for job %s', job_reply.uid)
         else:
             compute = self.computations.get(job_info.compute_id, None)
             if compute is not None:
                 compute.pending_results += 1
     finally:
         sock.close()
         if not resending:
             compute = self.computations.get(job_info.compute_id, None)
             if compute is not None:
                 compute.pending_jobs -= 1
                 if compute.pending_jobs == 0 and compute.zombie:
                     self.cleanup_computation(compute)
Example #10
0
 def tcp_task(conn, addr, coro=None):
     conn.settimeout(5)
     try:
         msg = yield conn.recvall(auth_len)
         msg = yield conn.recv_msg()
     except:
         logger.debug(traceback.format_exc())
         logger.debug('Ignoring invalid TCP message from %s:%s', addr[0], addr[1])
         raise StopIteration
     finally:
         conn.close()
     logger.debug('Ping message from %s (%s)', addr[0], addr[1])
     try:
         info = asyncoro.unserialize(msg[len('PING:'.encode()):])
         if info['version'] != __version__:
             logger.warning('Ignoring %s due to version mismatch: %s / %s',
                            info['ip_addrs'], info['version'], __version__)
             raise StopIteration
         # TODO: since dispynetrelay is not aware of computations
         # closing, if more than one client sends ping, nodes will
         # respond to different clients
         self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]]
         self.scheduler_port = info['port']
     except:
         logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
         logger.debug(traceback.format_exc())
         raise StopIteration
     if info.get('relay', None):
         logger.debug('Ignoring ping back (from %s)', addr[0])
         raise StopIteration
     logger.debug('relaying ping from %s / %s', info['ip_addrs'], addr[0])
     if self.node_port == self.listen_port:
         info['relay'] = 'y'  # 'check if this message loops back to self
     bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
     bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
     yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info),
                          ('<broadcast>', self.node_port))
     bc_sock.close()
Example #11
0
    def relay_pings(self, ip_addr='', netmask=None, node_port=51348,
                    scheduler_node=None, scheduler_port=51347):
        netaddr = None
        if not netmask:
            try:
                ip_addr, bits = ip_addr.split('/')
                socket.inet_aton(ip_addr)
                netmask = (0xffffffff << (32 - int(bits))) & 0xffffffff
                netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                netmask = '255.255.255.255'
        if ip_addr:
            socket.inet_aton(ip_addr)
        else:
            ip_addr = socket.gethostbyname(socket.gethostname())
        if not netaddr and netmask:
            try:
                if isinstance(netmask, str):
                    netmask = struct.unpack('>L', socket.inet_aton(netmask))[0]
                else:
                    assert isinstance(netmask, int)
                assert netmask > 0
                netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                logger.warning('Invalid netmask')

        try:
            socket.inet_ntoa(struct.pack('>L', netaddr))
            socket.inet_ntoa(struct.pack('>L', netmask))
        except:
            netaddr = netmask = None

        scheduler_version = _dispy_version

        bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        bc_sock.bind(('', 0))
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        scheduler_ip_addr = _node_ipaddr(scheduler_node)
        if scheduler_ip_addr and scheduler_port:
            relay_request = serialize({'ip_addr':scheduler_ip_addr, 'port':scheduler_port,
                                       'version':_dispy_version, 'sign':None})
            bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port))

        node_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        node_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        node_sock.bind(('', node_port))
        sched_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        sched_sock.bind(('', scheduler_port))
        logger.info('Listening on %s:%s/%s', ip_addr, node_port, scheduler_port)
        while True:
            ready = select.select([node_sock, sched_sock], [], [])[0]
            for sock in ready:
                if sock == node_sock:
                    msg, addr = node_sock.recvfrom(1024)
                    if not msg.startswith('PING:'):
                        logger.debug('Ignoring message "%s" from %s',
                                     msg[:min(len(msg), 5)], addr[0])
                        continue
                    if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr:
                        logger.debug('Ignoring own ping (from %s)', addr[0])
                        continue
                    logger.debug('Ping message from %s (%s)', addr[0], addr[1])
                    try:
                        info = unserialize(msg[len('PING:'):])
                        scheduler_ip_addr = info['ip_addr']
                        scheduler_port = info['port']
                        assert info['version'] == _dispy_version
                        # scheduler_sign = info['sign']
                        assert isinstance(scheduler_port, int)
                    except:
                        logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                        logger.debug(traceback.format_exc())
                        continue
                    logger.debug('relaying ping from %s / %s' % (info['ip_addr'], addr[0]))
                    if scheduler_ip_addr is None:
                        info['ip_addr'] = scheduler_ip_addr = addr[0]
                    relay_request = serialize(info)
                    bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port))
                else:
                    assert sock == sched_sock
                    msg, addr = sched_sock.recvfrom(1024)
                    if msg.startswith('PING:') and scheduler_ip_addr and scheduler_port:
                        try:
                            info = unserialize(msg[len('PONG:'):])
                            assert info['version'] == _dispy_version
                            assert isinstance(info['ip_addr'], str)
                            assert isinstance(info['port'], int)
                            # assert isinstance(info['cpus'], int)
                            info['scheduler_ip_addr'] = scheduler_ip_addr
                            relay_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                            relay_sock.sendto('PING:' + serialize(info),
                                              (scheduler_ip_addr, scheduler_port))
                            relay_sock.close()
                        except:
                            logger.debug(traceback.format_exc())
                            # raise
                            logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
Example #12
0
    def timer_task(self, coro=None):
        coro.set_daemon()
        reset = True
        last_pulse_time = last_zombie_time = time.time()
        while True:
            if reset:
                if self.pulse_interval and self.zombie_interval:
                    timeout = min(self.pulse_interval, self.zombie_interval)
                    self.zombie_interval = max(5 * self.pulse_interval, self.zombie_interval)
                else:
                    timeout = max(self.pulse_interval, self.zombie_interval)
                    self.zombie_interval = self.zombie_interval

            reset = yield coro.suspend(timeout)

            now = time.time()
            if self.pulse_interval and (now - last_pulse_time) >= self.pulse_interval:
                n = self.cpus - self.avail_cpus
                assert n >= 0
                if n > 0 and self.scheduler_ip_addr:
                    last_pulse_time = now
                    msg = 'PULSE:' + serialize({'ip_addr':self.ext_ip_addr,
                                                'port':self.udp_sock.getsockname()[1], 'cpus':n})
                    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    sock = AsynCoroSocket(sock, blocking=False)
                    sock.settimeout(1)
                    yield sock.sendto(msg, (self.scheduler_ip_addr, self.scheduler_port))
                    sock.close()
            if self.zombie_interval and (now - last_zombie_time) >= self.zombie_interval:
                last_zombie_time = now
                yield self.lock.acquire()
                for compute in self.computations.itervalues():
                    if (now - compute.last_pulse) > self.zombie_interval:
                        compute.zombie = True
                zombies = [compute for compute in self.computations.itervalues() \
                           if compute.zombie and compute.pending_jobs == 0]
                for compute in zombies:
                    logger.debug('Deleting zombie computation "%s"', compute.name)
                    self.cleanup_computation(compute)
                phoenix = [compute for compute in self.computations.itervalues() \
                           if not compute.zombie and compute.pending_results]
                for compute in phoenix:
                    files = [f for f in os.listdir(compute.dest_path) \
                             if f.startswith('_dispy_job_reply_')]
                    # limit number queued so as not to take up too much time
                    files = files[:min(len(files), 128)]
                    for f in files:
                        result_file = os.path.join(compute.dest_path, f)
                        try:
                            fd = open(result_file, 'rb')
                            job_result = pickle.load(fd)
                            fd.close()
                        except:
                            logger.debug('Could not load "%s"', result_file)
                            logger.debug(traceback.format_exc())
                            continue
                        try:
                            os.remove(result_file)
                        except:
                            logger.debug('Could not remove "%s"', result_file)
                        compute.pending_results -= 1
                        job_info = _DispyJobInfo(job_result, (compute.scheduler_ip_addr,
                                                              compute.job_result_port), compute)
                        Coro(self._send_job_reply, job_info, resending=True)
                self.lock.release()
                for compute in zombies:
                    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    sock = AsynCoroSocket(sock, blocking=False)
                    sock.settimeout(1)
                    logger.debug('Sending TERMINATE to %s', compute.scheduler_ip_addr)
                    data = serialize({'ip_addr':self.address[0], 'port':self.address[1],
                                      'sign':self.signature})
                    yield sock.sendto('TERMINATED:%s' % data, (compute.scheduler_ip_addr,
                                                               compute.scheduler_port))
                    sock.close()
                if self.scheduler_ip_addr is None and self.avail_cpus == self.cpus:
                    self.pulse_interval = None
                    reset = True
                    yield self.send_pong_msg(coro=coro)
Example #13
0
        def retrieve_job_task(msg):
            assert coro is not None
            try:
                req = unserialize(msg)
                assert req['uid'] is not None
                assert req['hash'] is not None
                assert req['compute_id'] is not None
            except:
                resp = serialize('Invalid job')
                try:
                    yield conn.send_msg(resp)
                except:
                    pass
                raise StopIteration

            job_info = self.job_infos.get(req['uid'], None)
            resp = None
            if job_info is not None:
                try:
                    yield conn.send_msg(serialize(job_info.job_reply))
                    ack = yield conn.recv_msg()
                    # no need to check ack
                except:
                    logger.debug('Could not send reply for job %s', req['uid'])
                raise StopIteration

            for d in os.listdir(self.dest_path_prefix):
                info_file = os.path.join(self.dest_path_prefix, d,
                                         '_dispy_job_reply_%s' % req['uid'])
                if os.path.isfile(info_file):
                    try:
                        fd = open(info_file, 'rb')
                        job_reply = pickle.load(fd)
                        fd.close()
                    except:
                        job_reply = None
                    if hasattr(job_reply, 'hash') and job_reply.hash == req['hash']:
                        try:
                            yield conn.send_msg(serialize(job_reply))
                            ack = yield conn.recv_msg()
                            assert ack == 'ACK'
                        except:
                            logger.debug('Could not send reply for job %s', req['uid'])
                            raise StopIteration
                        try:
                            os.remove(info_file)
                            yield self.lock.acquire()
                            compute = self.computations.get(req['compute_id'], None)
                            if compute is not None:
                                compute.pending_results -= 1
                                if compute.pending_results == 0:
                                    compute.zombie = True
                                    self.cleanup_computation(compute)
                            self.lock.release()
                        except:
                            logger.debug('Could not remove "%s"', info_file)
                        raise StopIteration
            else:
                resp = serialize('Invalid job: %s' % req['uid'])

            if resp:
                try:
                    yield conn.send_msg(resp)
                except:
                    pass
Example #14
0
        def add_computation_task(msg):
            assert coro is not None
            try:
                compute = unserialize(msg)
            except:
                logger.debug('Ignoring computation request from %s', addr[0])
                try:
                    yield conn.send_msg('Invalid computation request')
                except:
                    logger.warning('Failed to send reply to %s', str(addr))
                raise StopIteration
            yield self.lock.acquire()
            if not ((self.scheduler_ip_addr is None) or
                    (self.scheduler_ip_addr == compute.scheduler_ip_addr and \
                     self.scheduler_port == compute.scheduler_port)):
                logger.debug('Ignoring computation request from %s: %s, %s, %s',
                             compute.scheduler_ip_addr, self.scheduler_ip_addr,
                             self.avail_cpus, self.cpus)
                self.lock.release()
                try:
                    yield conn.send_msg('Busy')
                except:
                    pass
                raise StopIteration

            resp = 'ACK'
            if compute.dest_path and isinstance(compute.dest_path, str):
                compute.dest_path = compute.dest_path.strip(os.sep)
            else:
                for x in xrange(20):
                    compute.dest_path = os.urandom(8).encode('hex')
                    if compute.dest_path.find(os.sep) >= 0:
                        continue
                    if not os.path.isdir(os.path.join(self.dest_path_prefix, compute.dest_path)):
                        break
                else:
                    logger.warning('Failed to create unique dest_path: %s', compute.dest_path)
                    resp = 'NACK'
            compute.dest_path = os.path.join(self.dest_path_prefix, compute.dest_path)
            try:
                os.makedirs(compute.dest_path)
                os.chmod(compute.dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
                logger.debug('dest_path for "%s": %s', compute.name, compute.dest_path)
            except:
                logger.warning('Invalid destination path: "%s"', compute.dest_path)
                if os.path.isdir(compute.dest_path):
                    os.rmdir(compute.dest_path)
                self.lock.release()
                try:
                    yield conn.send_msg('NACK (Invalid dest_path)')
                except:
                    logger.warning('Failed to send reply to %s', str(addr))
                raise StopIteration
            if compute.id in self.computations:
                logger.warning('Computation "%s" (%s) is being replaced',
                               compute.name, compute.id)
            setattr(compute, 'last_pulse', time.time())
            setattr(compute, 'pending_jobs', 0)
            setattr(compute, 'pending_results', 0)
            setattr(compute, 'zombie', False)
            logger.debug('xfer_files given: %s', ','.join(xf.name for xf in compute.xfer_files))
            if compute.type == _Compute.func_type:
                try:
                    code = compile(compute.code, '<string>', 'exec')
                except:
                    logger.warning('Computation "%s" could not be compiled', compute.name)
                    if os.path.isdir(compute.dest_path):
                        os.rmdir(compute.dest_path)
                    self.lock.release()
                    try:
                        yield conn.send_msg('NACK (Compilation failed)')
                    except:
                        logger.warning('Failed to send reply to %s', str(addr))
                    raise StopIteration
                compute.code = marshal.dumps(code)
            elif compute.type == _Compute.prog_type:
                assert not compute.code
                compute.name = os.path.join(compute.dest_path, os.path.basename(compute.name))

            xfer_files = []
            for xf in compute.xfer_files:
                tgt = os.path.join(compute.dest_path, os.path.basename(xf.name))
                try:
                    if _same_file(tgt, xf):
                        logger.debug('Ignoring file "%s" / "%s"', xf.name, tgt)
                        if tgt not in self.file_uses:
                            self.file_uses[tgt] = 0
                        self.file_uses[tgt] += 1
                        continue
                except:
                    pass
                if self.max_file_size and xf.stat_buf.st_size > self.max_file_size:
                    resp = 'NACK (file "%s" too big)' % xf.name
                else:
                    xfer_files.append(xf)
            if resp == 'ACK' and ((self.scheduler_ip_addr is not None) and \
                                  (self.scheduler_ip_addr != compute.scheduler_ip_addr)):
                resp = 'NACK (busy)'
            if resp == 'ACK':
                self.computations[compute.id] = compute
                self.scheduler_ip_addr = compute.scheduler_ip_addr
                self.scheduler_port = compute.scheduler_port
                self.pulse_interval = compute.pulse_interval
                self.lock.release()
                if xfer_files:
                    resp += ':XFER_FILES:' + serialize(xfer_files)
                try:
                    yield conn.send_msg(resp)
                except:
                    assert self.scheduler_ip_addr == compute.scheduler_ip_addr
                    yield self.lock.acquire()
                    del self.computations[compute.id]
                    self.scheduler_ip_addr = None
                    self.scheduler_port = None
                    self.pulse_interval = None
                    self.lock.release()
                else:
                    self.timer_coro.resume(True)
            else:
                self.lock.release()
                if os.path.isdir(compute.dest_path):
                    os.rmdir(compute.dest_path)
                try:
                    yield conn.send_msg(resp)
                except:
                    pass
Example #15
0
    def udp_server(self, scheduler_ip_addr, coro=None):
        assert coro is not None
        coro.set_daemon()
        if self.avail_cpus == self.cpus:
            yield self.send_pong_msg(coro=coro)
        pong_msg = {'ip_addr':self.ext_ip_addr, 'name':self.name, 'port':self.address[1],
                    'cpus':self.cpus, 'sign':self.signature, 'version':_dispy_version}
        pong_msg = 'PONG:' + serialize(pong_msg)

        if scheduler_ip_addr:
            sock = AsynCoroSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM))
            try:
                yield sock.sendto(pong_msg, (scheduler_ip_addr, self.scheduler_port))
            except:
                logger.warning("Couldn't send ping message to %s:%s",
                               scheduler_ip_addr, self.scheduler_port)
            finally:
                sock.close()

        while True:
            msg, addr = yield self.udp_sock.recvfrom(1024)
            # TODO: process each message as separate Coro, so
            # exceptions are contained?
            if msg.startswith('PING:'):
                if self.cpus != self.avail_cpus:
                    logger.debug('Busy (%s/%s); ignoring ping message from %s',
                                 self.cpus, self.avail_cpus, addr[0])
                    continue
                try:
                    info = unserialize(msg[len('PING:'):])
                    socket.inet_aton(info['scheduler_ip_addr'])
                    assert isinstance(info['scheduler_port'], int)
                    assert info['version'] == _dispy_version
                    addr = (info['scheduler_ip_addr'], info['scheduler_port'])
                except:
                    # raise
                    logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                    continue
                yield self.udp_sock.sendto(pong_msg, addr)
            elif msg.startswith('PULSE:'):
                try:
                    info = unserialize(msg[len('PULSE:'):])
                    assert info['ip_addr'] == self.scheduler_ip_addr
                    yield self.lock.acquire()
                    for compute in self.computations.itervalues():
                        compute.last_pulse = time.time()
                    yield self.lock.release()
                except:
                    logger.warning('Ignoring PULSE from %s', addr[0])
            elif msg.startswith('SERVERPORT:'):
                try:
                    req = unserialize(msg[len('SERVERPORT:'):])
                    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    reply = {'ip_addr':self.address[0], 'port':self.address[1],
                             'sign':self.signature, 'version':_dispy_version}
                    sock = AsynCoroSocket(sock, blocking=False)
                    sock.settimeout(1)
                    yield sock.sendto(serialize(reply), (req['ip_addr'], req['port']))
                    sock.close()
                except:
                    logger.debug(traceback.format_exc())
                    # pass
            else:
                logger.warning('Ignoring ping message from %s', addr[0])
Example #16
0
    def relay_pings(self, ip_addr='', netmask=None, node_port=51348,
                    scheduler_node=None, scheduler_port=51347):
        netaddr = None
        if not netmask:
            try:
                ip_addr, bits = ip_addr.split('/')
                socket.inet_aton(ip_addr)
                netmask = (0xffffffff << (32 - int(bits))) & 0xffffffff
                netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                netmask = '255.255.255.255'
        if ip_addr:
            socket.inet_aton(ip_addr)
        else:
            ip_addr = socket.gethostbyname(socket.gethostname())
        if not netaddr and netmask:
            try:
                if isinstance(netmask, str):
                    netmask = struct.unpack('>L', socket.inet_aton(netmask))[0]
                else:
                    assert isinstance(netmask, int)
                assert netmask > 0
                netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask
            except:
                logger.warning('Invalid netmask')

        try:
            socket.inet_ntoa(struct.pack('>L', netaddr))
            socket.inet_ntoa(struct.pack('>L', netmask))
        except:
            netaddr = netmask = None

        scheduler_version = _dispy_version

        bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        bc_sock.bind(('', 0))
        bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)

        scheduler_ip_addr = _node_ipaddr(scheduler_node)
        if scheduler_ip_addr and scheduler_port:
            relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr,
                                       'scheduler_port':scheduler_port,
                                       'version':scheduler_version})
            bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port))

        ping_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        ping_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        ping_sock.bind(('', node_port))
        pong_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        pong_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        pong_sock.bind(('', scheduler_port))
        logger.info('Listening on %s:%s', ip_addr, node_port)
        last_ping = 0
        while True:
            ready = select.select([ping_sock, pong_sock], [], [])[0]
            for sock in ready:
                if sock == ping_sock:
                    msg, addr = ping_sock.recvfrom(1024)
                    if not msg.startswith('PING:'):
                        logger.debug('Ignoring message "%s" from %s',
                                     msg[:max(len(msg), 5)], addr[0])
                        continue
                    if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr:
                        logger.debug('Ignoring own ping (from %s)', addr[0])
                        continue
                    if (time.time() - last_ping) < 10:
                        logger.warning('Ignoring ping (from %s) for 10 more seconds', addr[0])
                        time.sleep(10)
                    last_ping = time.time()
                    logger.debug('Ping message from %s (%s)', addr[0], addr[1])
                    try:
                        data = unserialize(msg[len('PING:'):])
                        scheduler_ip_addr = data['scheduler_ip_addr']
                        scheduler_port = data['scheduler_port']
                        scheduler_version = data['version']
                        assert isinstance(scheduler_ip_addr, str)
                        assert isinstance(scheduler_port, int)
                    except:
                        logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
                        continue
                    relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr,
                                               'scheduler_port':scheduler_port,
                                               'version':scheduler_version})
                    bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port))
                else:
                    assert sock == pong_sock
                    msg, addr = pong_sock.recvfrom(1024)
                    if not msg.startswith('PONG:'):
                        logger.debug('Ignoring pong message "%s" from %s',
                                     msg[:max(len(msg), 5)], addr[0])
                        continue
                    # if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr:
                    #     logger.debug('Ignoring own pong (from %s)', addr[0])
                    #     continue
                    if not (scheduler_ip_addr and scheduler_port):
                        logger.debug('Ignoring pong message from %s', str(addr))
                        continue
                    logger.debug('Pong message from %s (%s)', addr[0], addr[1])
                    try:
                        pong = unserialize(msg[len('PONG:'):])
                        assert isinstance(pong['host'], str)
                        assert isinstance(pong['port'], int)
                        assert isinstance(pong['cpus'], int)
                        relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr,
                                                   'scheduler_port':scheduler_port,
                                                   'version':scheduler_version})
                        relay_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                        relay_sock.sendto('PING:%s' % relay_request,
                                          (pong['host'], node_port))
                        relay_sock.close()
                    except:
                        # raise
                        logger.debug('Ignoring pong message from %s (%s)', addr[0], addr[1])