def listen_udp_proc(self, coro=None): coro.set_daemon() bc_sock = asyncoro.AsyncSocket( socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) if self.scheduler_ip_addrs and self.scheduler_port: relay_request = { 'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port, 'version': __version__, 'sign': None } bc_sock.sendto( 'PING:'.encode() + asyncoro.serialize(relay_request), ('<broadcast>', self.node_port)) bc_sock.close() listen_sock = asyncoro.AsyncSocket( socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) listen_sock.bind(('', self.listen_port)) while 1: msg, addr = yield listen_sock.recvfrom(1024) if not msg.startswith('PING:'.encode()): logger.debug('Ignoring message "%s" from %s', msg[:min(len(msg), 5)], addr[0]) continue logger.debug('Ping message from %s (%s)', addr[0], addr[1]) try: info = asyncoro.unserialize(msg[len('PING:'.encode()):]) if info['version'] != __version__: logger.warning( 'Ignoring %s due to version mismatch: %s / %s', info['ip_addrs'], info['version'], __version__) continue self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]] self.scheduler_port = info['port'] except: logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) logger.debug(traceback.format_exc()) continue if info.get('relay', None): logger.debug('Ignoring ping back (from %s)', addr[0]) continue logger.debug('relaying ping from %s / %s' % (info['ip_addrs'], addr[0])) if self.node_port == self.listen_port: info[ 'relay'] = 'y' # 'check if this message loops back to self bc_sock = asyncoro.AsyncSocket( socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info), ('<broadcast>', self.node_port)) bc_sock.close()
def listen_udp_proc(self, coro=None): coro.set_daemon() bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) if self.scheduler_ip_addrs and self.scheduler_port: relay_request = { "ip_addrs": self.scheduler_ip_addrs, "port": self.scheduler_port, "version": __version__, "sign": None, } bc_sock.sendto("PING:".encode() + asyncoro.serialize(relay_request), ("<broadcast>", self.node_port)) bc_sock.close() listen_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) listen_sock.bind(("", self.listen_port)) while 1: msg, addr = yield listen_sock.recvfrom(1024) if not msg.startswith("PING:".encode()): logger.debug('Ignoring message "%s" from %s', msg[: min(len(msg), 5)], addr[0]) continue logger.debug("Ping message from %s (%s)", addr[0], addr[1]) try: info = asyncoro.deserialize(msg[len("PING:".encode()) :]) if info["version"] != __version__: logger.warning( "Ignoring %s due to version mismatch: %s / %s", info["ip_addrs"], info["version"], __version__ ) continue self.scheduler_ip_addrs = info["ip_addrs"] + [addr[0]] self.scheduler_port = info["port"] except: logger.debug("Ignoring ping message from %s (%s)", addr[0], addr[1]) logger.debug(traceback.format_exc()) continue if info.get("relay", None): logger.debug("Ignoring ping back (from %s)", addr[0]) continue logger.debug("relaying ping from %s / %s", info["ip_addrs"], addr[0]) if self.node_port == self.listen_port: info["relay"] = "y" # 'check if this message loops back to self bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) yield bc_sock.sendto("PING:".encode() + asyncoro.serialize(info), ("<broadcast>", self.node_port)) bc_sock.close()
def sched_udp_proc(self, coro=None): coro.set_daemon() sched_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sched_sock.bind(("", self.scheduler_port)) while 1: msg, addr = yield sched_sock.recvfrom(1024) if not msg.startswith("PING:".encode()) or not self.scheduler_ip_addrs or not self.scheduler_port: logger.debug("Ignoring ping message from %s (%s)", addr[0], addr[1]) continue try: info = asyncoro.deserialize(msg[len("PING:".encode()) :]) logger.debug("sched_sock: %s", info) assert info["version"] == __version__ # assert isinstance(info['cpus'], int) except: logger.debug(traceback.format_exc()) msg = {"ip_addrs": self.scheduler_ip_addrs, "port": self.scheduler_port, "version": __version__} if info.get("relay", None): logger.debug("Ignoring ping back from %s: %s", addr[0], info) continue msg["relay"] = "y" relay_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) yield relay_sock.sendto("PING:".encode() + asyncoro.serialize(msg), (info["ip_addr"], info["port"])) relay_sock.close()
def dispy_provisional_result(result): """Sends provisional result of computation back to the client. In some cases, such as optimizations, computations may send current (best) result to the client and continue computation (for next iteration) so that the client may decide to terminate computations based on the results or alter computations if necessary. The computations can use this function in such cases with the current result of computation as argument. """ __dispy_job_reply = __dispy_job_info.job_reply logger.debug('Sending provisional result for job %s to %s', __dispy_job_reply.uid, __dispy_job_info.reply_addr) __dispy_job_reply.status = DispyJob.ProvisionalResult __dispy_job_reply.result = result sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock = AsynCoroSocket(sock, blocking=True, keyfile=__dispy_job_keyfile, certfile=__dispy_job_certfile) sock.settimeout(2) try: sock.connect(__dispy_job_info.reply_addr) sock.send_msg(serialize(__dispy_job_reply)) ack = sock.recv_msg() except: logger.warning("Couldn't send provisional results %s:\n%s", str(result), traceback.format_exc()) sock.close()
def sched_udp_proc(self, coro=None): coro.set_daemon() sched_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sched_sock.bind(('', self.scheduler_port)) while 1: msg, addr = yield sched_sock.recvfrom(1024) if (not msg.startswith('PING:'.encode()) or not self.scheduler_ip_addrs or not self.scheduler_port): logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) continue try: info = asyncoro.unserialize(msg[len('PING:'.encode()):]) logger.debug('sched_sock: %s', info) assert info['version'] == __version__ # assert isinstance(info['cpus'], int) except: logger.debug(traceback.format_exc()) msg = {'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port, 'version': __version__} if info.get('relay', None): logger.debug('Ignoring ping back from %s: %s', addr[0], info) continue msg['relay'] = 'y' relay_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) yield relay_sock.sendto('PING:'.encode() + asyncoro.serialize(msg), (info['ip_addr'], info['port'])) relay_sock.close()
def _shutdown(self, coro=None): assert coro is not None yield self.lock.acquire() job_infos = self.job_infos self.job_infos = {} computations = self.computations.items() self.computations = {} if self.reply_Q: self.reply_Q.put(None) self.lock.release() for uid, job_info in job_infos.iteritems(): job_info.proc.terminate() logger.debug('process for %s is killed', uid) if isinstance(job_info.proc, multiprocessing.Process): job_info.proc.join(2) else: job_info.proc.wait() for cid, compute in computations: sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock = AsynCoroSocket(sock, blocking=False) sock.settimeout(2) logger.debug('Sending TERMINATE to %s', compute.scheduler_ip_addr) data = serialize({'ip_addr':self.address[0], 'port':self.address[1], 'sign':self.signature}) yield sock.sendto('TERMINATED:' + data, (compute.scheduler_ip_addr, compute.scheduler_port)) sock.close()
def sched_udp_proc(self, coro=None): coro.set_daemon() sched_sock = asyncoro.AsyncSocket( socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sched_sock.bind(('', self.scheduler_port)) while 1: msg, addr = yield sched_sock.recvfrom(1024) if (not msg.startswith('PING:'.encode()) or not self.scheduler_ip_addrs or not self.scheduler_port): logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) continue try: info = asyncoro.unserialize(msg[len('PING:'.encode()):]) logger.debug('sched_sock: %s' % info) assert info['version'] == __version__ # assert isinstance(info['cpus'], int) except: logger.debug(traceback.format_exc()) msg = { 'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port, 'version': __version__ } if info.get('relay', None): logger.debug('Ignoring ping back from %s: %s', addr[0], info) continue msg['relay'] = 'y' relay_sock = asyncoro.AsyncSocket( socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) yield relay_sock.sendto('PING:'.encode() + asyncoro.serialize(msg), (info['ip_addr'], info['port'])) relay_sock.close()
def listen_udp_proc(self, coro=None): coro.set_daemon() bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) if self.scheduler_ip_addrs and self.scheduler_port: relay_request = {'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port, 'version': __version__, 'sign': None} bc_sock.sendto('PING:'.encode() + asyncoro.serialize(relay_request), ('<broadcast>', self.node_port)) bc_sock.close() listen_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) listen_sock.bind(('', self.listen_port)) while 1: msg, addr = yield listen_sock.recvfrom(1024) if not msg.startswith('PING:'.encode()): logger.debug('Ignoring message "%s" from %s', msg[:min(len(msg), 5)], addr[0]) continue logger.debug('Ping message from %s (%s)', addr[0], addr[1]) try: info = asyncoro.unserialize(msg[len('PING:'.encode()):]) if info['version'] != __version__: logger.warning('Ignoring %s due to version mismatch: %s / %s', info['ip_addrs'], info['version'], __version__) continue self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]] self.scheduler_port = info['port'] except: logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) logger.debug(traceback.format_exc()) continue if info.get('relay', None): logger.debug('Ignoring ping back (from %s)', addr[0]) continue logger.debug('relaying ping from %s / %s', info['ip_addrs'], addr[0]) if self.node_port == self.listen_port: info['relay'] = 'y' # 'check if this message loops back to self bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info), ('<broadcast>', self.node_port)) bc_sock.close()
def send_pong_msg(self, coro=None): ping_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) ping_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) ping_sock = AsynCoroSocket(ping_sock, blocking=False) pong_msg = {'ip_addr':self.ext_ip_addr, 'name':self.name, 'port':self.address[1], 'cpus':self.cpus, 'sign':self.signature, 'version':_dispy_version} pong_msg = 'PONG:' + serialize(pong_msg) yield ping_sock.sendto(pong_msg, ('<broadcast>', self.scheduler_port)) ping_sock.close()
def _send_job_reply(self, job_info, resending=False, coro=None): """Internal use only. """ assert coro is not None job_reply = job_info.job_reply logger.debug('Sending result for job %s (%s) to %s', job_reply.uid, job_reply.status, str(job_info.reply_addr)) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock = AsynCoroSocket(sock, blocking=False, certfile=self.certfile, keyfile=self.keyfile) sock.settimeout(2) try: yield sock.connect(job_info.reply_addr) yield sock.send_msg(serialize(job_reply)) ack = yield sock.recv_msg() assert ack == 'ACK' except: logger.error("Couldn't send results for %s to %s", job_reply.uid, str(job_info.reply_addr)) # store job result even if computation has not enabled # fault recovery; user may be able to access node and # retrieve result manually f = os.path.join(job_info.compute_dest_path, '_dispy_job_reply_%s' % job_reply.uid) logger.debug('storing results for job %s', job_reply.uid) try: fd = open(f, 'wb') pickle.dump(job_reply, fd) fd.close() except: logger.debug('Could not save results for job %s', job_reply.uid) else: yield self.lock.acquire() compute = self.computations.get(job_info.compute_id, None) if compute is not None: compute.pending_results += 1 self.lock.release() finally: sock.close() if not resending: yield self.lock.acquire() self.avail_cpus += 1 compute = self.computations.get(job_info.compute_id, None) if compute is None: logger.warning('Computation for %s / %s is invalid!', job_reply.uid, job_info.compute_id) else: # technically last_pulse should be updated only # when successfully sent reply, but no harm if done # otherwise, too compute.last_pulse = time.time() compute.pending_jobs -= 1 if compute.pending_jobs == 0 and compute.zombie: self.cleanup_computation(compute) self.lock.release()
def sched_udp_proc(self, coro=None): coro.set_daemon() sched_sock = asyncoro.AsyncSocket( socket.socket(self.addrinfo[0], socket.SOCK_DGRAM)) if self.addrinfo[0] == socket.AF_INET: sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) addr = ('', self.scheduler_port) else: # self.addrinfo[0] == socket.AF_INET6 sched_sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, self.mreq) addr = list(self.addrinfo[4]) addr[0] = '' addr[1] = self.scheduler_port addr = tuple(addr) sched_sock.bind(addr) while 1: msg, addr = yield sched_sock.recvfrom(1024) if (not msg.startswith('PING:'.encode()) or not self.scheduler_ip_addrs or not self.scheduler_port): logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) continue try: info = asyncoro.deserialize(msg[len('PING:'.encode()):]) assert info['version'] == __version__ # assert isinstance(info['cpus'], int) except: logger.debug(traceback.format_exc()) msg = { 'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port, 'version': __version__ } if info.get('relay', None): logger.debug('Ignoring ping back from %s: %s', addr[0], info) continue msg['relay'] = 'y' relay_sock = asyncoro.AsyncSocket( socket.socket(self.addrinfo[0], socket.SOCK_DGRAM)) if self.addrinfo[0] == socket.AF_INET: addr = (info['ip_addr'], info['port']) else: # self.sock_family == socket.AF_INET6 addr = list(self.addrinfo[4]) addr[1] = 0 bc_sock.bind(tuple(addr)) addr[0] = info['ip_addr'] addr[1] = info['port'] addr = tuple(addr) yield relay_sock.sendto('PING:'.encode() + asyncoro.serialize(msg), addr) relay_sock.close()
def _send_job_reply(self, job_info, resending=False, coro=None): """Internal use only. """ assert coro is not None job_reply = job_info.job_reply logger.debug('Sending result for job %s (%s) to %s', job_reply.uid, job_reply.status, str(job_info.reply_addr)) if not resending: self.avail_cpus += 1 assert self.avail_cpus <= self.num_cpus sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock = AsynCoroSocket(sock, blocking=False, certfile=self.certfile, keyfile=self.keyfile) sock.settimeout(5) try: yield sock.connect(job_info.reply_addr) yield sock.send_msg(serialize(job_reply)) ack = yield sock.recv_msg() assert ack == 'ACK' compute = self.computations.get(job_info.compute_id, None) if compute is not None: compute.last_pulse = time.time() except: logger.error("Couldn't send results for %s to %s : %s", job_reply.uid, str(job_info.reply_addr), traceback.format_exc()) # store job result even if computation has not enabled # fault recovery; user may be able to access node and # retrieve result manually f = os.path.join(job_info.compute_dest_path, '_dispy_job_reply_%s' % job_reply.uid) logger.debug('storing results for job %s', job_reply.uid) try: fd = open(f, 'wb') pickle.dump(job_reply, fd) fd.close() except: logger.debug('Could not save results for job %s', job_reply.uid) else: compute = self.computations.get(job_info.compute_id, None) if compute is not None: compute.pending_results += 1 finally: sock.close() if not resending: compute = self.computations.get(job_info.compute_id, None) if compute is not None: compute.pending_jobs -= 1 if compute.pending_jobs == 0 and compute.zombie: self.cleanup_computation(compute)
def tcp_task(conn, addr, coro=None): conn.settimeout(5) try: msg = yield conn.recvall(auth_len) msg = yield conn.recv_msg() except: logger.debug(traceback.format_exc()) logger.debug('Ignoring invalid TCP message from %s:%s' % (addr[0], addr[1])) raise StopIteration finally: conn.close() logger.debug('Ping message from %s (%s)', addr[0], addr[1]) try: info = asyncoro.unserialize(msg[len('PING:'.encode()):]) if info['version'] != __version__: logger.warning( 'Ignoring %s due to version mismatch: %s / %s', info['ip_addrs'], info['version'], __version__) raise StopIteration # TODO: since dispynetrelay is not aware of computations # closing, if more than one client sends ping, nodes will # respond to different clients self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]] self.scheduler_port = info['port'] except: logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) logger.debug(traceback.format_exc()) raise StopIteration if info.get('relay', None): logger.debug('Ignoring ping back (from %s)', addr[0]) raise StopIteration logger.debug('relaying ping from %s / %s' % (info['ip_addrs'], addr[0])) if self.node_port == self.listen_port: info[ 'relay'] = 'y' # 'check if this message loops back to self bc_sock = asyncoro.AsyncSocket( socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info), ('<broadcast>', self.node_port)) bc_sock.close()
def tcp_task(conn, addr, coro=None): conn.settimeout(5) try: msg = yield conn.recvall(auth_len) msg = yield conn.recv_msg() except: logger.debug(traceback.format_exc()) logger.debug('Ignoring invalid TCP message from %s:%s', addr[0], addr[1]) raise StopIteration finally: conn.close() logger.debug('Ping message from %s (%s)', addr[0], addr[1]) try: info = asyncoro.unserialize(msg[len('PING:'.encode()):]) if info['version'] != __version__: logger.warning('Ignoring %s due to version mismatch: %s / %s', info['ip_addrs'], info['version'], __version__) raise StopIteration # TODO: since dispynetrelay is not aware of computations # closing, if more than one client sends ping, nodes will # respond to different clients self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]] self.scheduler_port = info['port'] except: logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) logger.debug(traceback.format_exc()) raise StopIteration if info.get('relay', None): logger.debug('Ignoring ping back (from %s)', addr[0]) raise StopIteration logger.debug('relaying ping from %s / %s', info['ip_addrs'], addr[0]) if self.node_port == self.listen_port: info['relay'] = 'y' # 'check if this message loops back to self bc_sock = asyncoro.AsyncSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info), ('<broadcast>', self.node_port)) bc_sock.close()
def relay_pings(self, ip_addr='', netmask=None, node_port=51348, scheduler_node=None, scheduler_port=51347): netaddr = None if not netmask: try: ip_addr, bits = ip_addr.split('/') socket.inet_aton(ip_addr) netmask = (0xffffffff << (32 - int(bits))) & 0xffffffff netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask except: netmask = '255.255.255.255' if ip_addr: socket.inet_aton(ip_addr) else: ip_addr = socket.gethostbyname(socket.gethostname()) if not netaddr and netmask: try: if isinstance(netmask, str): netmask = struct.unpack('>L', socket.inet_aton(netmask))[0] else: assert isinstance(netmask, int) assert netmask > 0 netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask except: logger.warning('Invalid netmask') try: socket.inet_ntoa(struct.pack('>L', netaddr)) socket.inet_ntoa(struct.pack('>L', netmask)) except: netaddr = netmask = None scheduler_version = _dispy_version bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) bc_sock.bind(('', 0)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) scheduler_ip_addr = _node_ipaddr(scheduler_node) if scheduler_ip_addr and scheduler_port: relay_request = serialize({'ip_addr':scheduler_ip_addr, 'port':scheduler_port, 'version':_dispy_version, 'sign':None}) bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port)) node_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) node_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) node_sock.bind(('', node_port)) sched_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sched_sock.bind(('', scheduler_port)) logger.info('Listening on %s:%s/%s', ip_addr, node_port, scheduler_port) while True: ready = select.select([node_sock, sched_sock], [], [])[0] for sock in ready: if sock == node_sock: msg, addr = node_sock.recvfrom(1024) if not msg.startswith('PING:'): logger.debug('Ignoring message "%s" from %s', msg[:min(len(msg), 5)], addr[0]) continue if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr: logger.debug('Ignoring own ping (from %s)', addr[0]) continue logger.debug('Ping message from %s (%s)', addr[0], addr[1]) try: info = unserialize(msg[len('PING:'):]) scheduler_ip_addr = info['ip_addr'] scheduler_port = info['port'] assert info['version'] == _dispy_version # scheduler_sign = info['sign'] assert isinstance(scheduler_port, int) except: logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) logger.debug(traceback.format_exc()) continue logger.debug('relaying ping from %s / %s' % (info['ip_addr'], addr[0])) if scheduler_ip_addr is None: info['ip_addr'] = scheduler_ip_addr = addr[0] relay_request = serialize(info) bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port)) else: assert sock == sched_sock msg, addr = sched_sock.recvfrom(1024) if msg.startswith('PING:') and scheduler_ip_addr and scheduler_port: try: info = unserialize(msg[len('PONG:'):]) assert info['version'] == _dispy_version assert isinstance(info['ip_addr'], str) assert isinstance(info['port'], int) # assert isinstance(info['cpus'], int) info['scheduler_ip_addr'] = scheduler_ip_addr relay_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) relay_sock.sendto('PING:' + serialize(info), (scheduler_ip_addr, scheduler_port)) relay_sock.close() except: logger.debug(traceback.format_exc()) # raise logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
def add_computation_task(msg): assert coro is not None try: compute = unserialize(msg) except: logger.debug('Ignoring computation request from %s', addr[0]) try: yield conn.send_msg('Invalid computation request') except: logger.warning('Failed to send reply to %s', str(addr)) raise StopIteration yield self.lock.acquire() if not ((self.scheduler_ip_addr is None) or (self.scheduler_ip_addr == compute.scheduler_ip_addr and \ self.scheduler_port == compute.scheduler_port)): logger.debug('Ignoring computation request from %s: %s, %s, %s', compute.scheduler_ip_addr, self.scheduler_ip_addr, self.avail_cpus, self.cpus) self.lock.release() try: yield conn.send_msg('Busy') except: pass raise StopIteration resp = 'ACK' if compute.dest_path and isinstance(compute.dest_path, str): compute.dest_path = compute.dest_path.strip(os.sep) else: for x in xrange(20): compute.dest_path = os.urandom(8).encode('hex') if compute.dest_path.find(os.sep) >= 0: continue if not os.path.isdir(os.path.join(self.dest_path_prefix, compute.dest_path)): break else: logger.warning('Failed to create unique dest_path: %s', compute.dest_path) resp = 'NACK' compute.dest_path = os.path.join(self.dest_path_prefix, compute.dest_path) try: os.makedirs(compute.dest_path) os.chmod(compute.dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) logger.debug('dest_path for "%s": %s', compute.name, compute.dest_path) except: logger.warning('Invalid destination path: "%s"', compute.dest_path) if os.path.isdir(compute.dest_path): os.rmdir(compute.dest_path) self.lock.release() try: yield conn.send_msg('NACK (Invalid dest_path)') except: logger.warning('Failed to send reply to %s', str(addr)) raise StopIteration if compute.id in self.computations: logger.warning('Computation "%s" (%s) is being replaced', compute.name, compute.id) setattr(compute, 'last_pulse', time.time()) setattr(compute, 'pending_jobs', 0) setattr(compute, 'pending_results', 0) setattr(compute, 'zombie', False) logger.debug('xfer_files given: %s', ','.join(xf.name for xf in compute.xfer_files)) if compute.type == _Compute.func_type: try: code = compile(compute.code, '<string>', 'exec') except: logger.warning('Computation "%s" could not be compiled', compute.name) if os.path.isdir(compute.dest_path): os.rmdir(compute.dest_path) self.lock.release() try: yield conn.send_msg('NACK (Compilation failed)') except: logger.warning('Failed to send reply to %s', str(addr)) raise StopIteration compute.code = marshal.dumps(code) elif compute.type == _Compute.prog_type: assert not compute.code compute.name = os.path.join(compute.dest_path, os.path.basename(compute.name)) xfer_files = [] for xf in compute.xfer_files: tgt = os.path.join(compute.dest_path, os.path.basename(xf.name)) try: if _same_file(tgt, xf): logger.debug('Ignoring file "%s" / "%s"', xf.name, tgt) if tgt not in self.file_uses: self.file_uses[tgt] = 0 self.file_uses[tgt] += 1 continue except: pass if self.max_file_size and xf.stat_buf.st_size > self.max_file_size: resp = 'NACK (file "%s" too big)' % xf.name else: xfer_files.append(xf) if resp == 'ACK' and ((self.scheduler_ip_addr is not None) and \ (self.scheduler_ip_addr != compute.scheduler_ip_addr)): resp = 'NACK (busy)' if resp == 'ACK': self.computations[compute.id] = compute self.scheduler_ip_addr = compute.scheduler_ip_addr self.scheduler_port = compute.scheduler_port self.pulse_interval = compute.pulse_interval self.lock.release() if xfer_files: resp += ':XFER_FILES:' + serialize(xfer_files) try: yield conn.send_msg(resp) except: assert self.scheduler_ip_addr == compute.scheduler_ip_addr yield self.lock.acquire() del self.computations[compute.id] self.scheduler_ip_addr = None self.scheduler_port = None self.pulse_interval = None self.lock.release() else: self.timer_coro.resume(True) else: self.lock.release() if os.path.isdir(compute.dest_path): os.rmdir(compute.dest_path) try: yield conn.send_msg(resp) except: pass
def udp_server(self, scheduler_ip_addr, coro=None): assert coro is not None coro.set_daemon() if self.avail_cpus == self.cpus: yield self.send_pong_msg(coro=coro) pong_msg = {'ip_addr':self.ext_ip_addr, 'name':self.name, 'port':self.address[1], 'cpus':self.cpus, 'sign':self.signature, 'version':_dispy_version} pong_msg = 'PONG:' + serialize(pong_msg) if scheduler_ip_addr: sock = AsynCoroSocket(socket.socket(socket.AF_INET, socket.SOCK_DGRAM)) try: yield sock.sendto(pong_msg, (scheduler_ip_addr, self.scheduler_port)) except: logger.warning("Couldn't send ping message to %s:%s", scheduler_ip_addr, self.scheduler_port) finally: sock.close() while True: msg, addr = yield self.udp_sock.recvfrom(1024) # TODO: process each message as separate Coro, so # exceptions are contained? if msg.startswith('PING:'): if self.cpus != self.avail_cpus: logger.debug('Busy (%s/%s); ignoring ping message from %s', self.cpus, self.avail_cpus, addr[0]) continue try: info = unserialize(msg[len('PING:'):]) socket.inet_aton(info['scheduler_ip_addr']) assert isinstance(info['scheduler_port'], int) assert info['version'] == _dispy_version addr = (info['scheduler_ip_addr'], info['scheduler_port']) except: # raise logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) continue yield self.udp_sock.sendto(pong_msg, addr) elif msg.startswith('PULSE:'): try: info = unserialize(msg[len('PULSE:'):]) assert info['ip_addr'] == self.scheduler_ip_addr yield self.lock.acquire() for compute in self.computations.itervalues(): compute.last_pulse = time.time() yield self.lock.release() except: logger.warning('Ignoring PULSE from %s', addr[0]) elif msg.startswith('SERVERPORT:'): try: req = unserialize(msg[len('SERVERPORT:'):]) sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) reply = {'ip_addr':self.address[0], 'port':self.address[1], 'sign':self.signature, 'version':_dispy_version} sock = AsynCoroSocket(sock, blocking=False) sock.settimeout(1) yield sock.sendto(serialize(reply), (req['ip_addr'], req['port'])) sock.close() except: logger.debug(traceback.format_exc()) # pass else: logger.warning('Ignoring ping message from %s', addr[0])
def listen_udp_proc(self, coro=None): coro.set_daemon() bc_sock = asyncoro.AsyncSocket( socket.socket(self.addrinfo[0], socket.SOCK_DGRAM)) if self.addrinfo[0] == socket.AF_INET: bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) addr = (self._broadcast, self.node_port) else: # self.sock_family == socket.AF_INET6 bc_sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_MULTICAST_HOPS, struct.pack('@i', 1)) addr = list(self.addrinfo[4]) addr[1] = 0 bc_sock.bind(tuple(addr)) addr[0] = self._broadcast addr[1] = self.node_port addr = tuple(addr) if self.scheduler_ip_addrs and self.scheduler_port: relay_request = { 'ip_addrs': self.scheduler_ip_addrs, 'port': self.scheduler_port, 'version': __version__, 'sign': None } bc_sock.sendto( 'PING:'.encode() + asyncoro.serialize(relay_request), addr) listen_sock = asyncoro.AsyncSocket( socket.socket(self.addrinfo[0], socket.SOCK_DGRAM)) if self.addrinfo[0] == socket.AF_INET: listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) addr = ('', self.listen_port) else: # self.addrinfo[0] == socket.AF_INET6 listen_sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, self.mreq) addr = list(self.addrinfo[4]) addr[0] = '' addr[1] = self.listen_port addr = tuple(addr) listen_sock.bind(addr) while 1: msg, addr = yield listen_sock.recvfrom(1024) if not msg.startswith('PING:'.encode()): logger.debug('Ignoring message "%s" from %s', msg[:min(len(msg), 5)], addr[0]) continue logger.debug('Ping message from %s (%s)', addr[0], addr[1]) try: info = asyncoro.deserialize(msg[len('PING:'.encode()):]) if info['version'] != __version__: logger.warning( 'Ignoring %s due to version mismatch: %s / %s', info['ip_addrs'], info['version'], __version__) continue self.scheduler_ip_addrs = info['ip_addrs'] + [addr[0]] self.scheduler_port = info['port'] except: logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) logger.debug(traceback.format_exc()) continue if info.get('relay', None): logger.debug('Ignoring ping back (from %s)', addr[0]) continue logger.debug('relaying ping from %s / %s', info['ip_addrs'], addr[0]) if self.node_port == self.listen_port: info[ 'relay'] = 'y' # 'check if this message loops back to self yield bc_sock.sendto('PING:'.encode() + asyncoro.serialize(info), addr)
def retrieve_job_task(msg): assert coro is not None try: req = unserialize(msg) assert req['uid'] is not None assert req['hash'] is not None assert req['compute_id'] is not None except: resp = serialize('Invalid job') try: yield conn.send_msg(resp) except: pass raise StopIteration job_info = self.job_infos.get(req['uid'], None) resp = None if job_info is not None: try: yield conn.send_msg(serialize(job_info.job_reply)) ack = yield conn.recv_msg() # no need to check ack except: logger.debug('Could not send reply for job %s', req['uid']) raise StopIteration for d in os.listdir(self.dest_path_prefix): info_file = os.path.join(self.dest_path_prefix, d, '_dispy_job_reply_%s' % req['uid']) if os.path.isfile(info_file): try: fd = open(info_file, 'rb') job_reply = pickle.load(fd) fd.close() except: job_reply = None if hasattr(job_reply, 'hash') and job_reply.hash == req['hash']: try: yield conn.send_msg(serialize(job_reply)) ack = yield conn.recv_msg() assert ack == 'ACK' except: logger.debug('Could not send reply for job %s', req['uid']) raise StopIteration try: os.remove(info_file) yield self.lock.acquire() compute = self.computations.get(req['compute_id'], None) if compute is not None: compute.pending_results -= 1 if compute.pending_results == 0: compute.zombie = True self.cleanup_computation(compute) self.lock.release() except: logger.debug('Could not remove "%s"', info_file) raise StopIteration else: resp = serialize('Invalid job: %s' % req['uid']) if resp: try: yield conn.send_msg(resp) except: pass
def timer_task(self, coro=None): coro.set_daemon() reset = True last_pulse_time = last_zombie_time = time.time() while True: if reset: if self.pulse_interval and self.zombie_interval: timeout = min(self.pulse_interval, self.zombie_interval) self.zombie_interval = max(5 * self.pulse_interval, self.zombie_interval) else: timeout = max(self.pulse_interval, self.zombie_interval) self.zombie_interval = self.zombie_interval reset = yield coro.suspend(timeout) now = time.time() if self.pulse_interval and (now - last_pulse_time) >= self.pulse_interval: n = self.cpus - self.avail_cpus assert n >= 0 if n > 0 and self.scheduler_ip_addr: last_pulse_time = now msg = 'PULSE:' + serialize({'ip_addr':self.ext_ip_addr, 'port':self.udp_sock.getsockname()[1], 'cpus':n}) sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock = AsynCoroSocket(sock, blocking=False) sock.settimeout(1) yield sock.sendto(msg, (self.scheduler_ip_addr, self.scheduler_port)) sock.close() if self.zombie_interval and (now - last_zombie_time) >= self.zombie_interval: last_zombie_time = now yield self.lock.acquire() for compute in self.computations.itervalues(): if (now - compute.last_pulse) > self.zombie_interval: compute.zombie = True zombies = [compute for compute in self.computations.itervalues() \ if compute.zombie and compute.pending_jobs == 0] for compute in zombies: logger.debug('Deleting zombie computation "%s"', compute.name) self.cleanup_computation(compute) phoenix = [compute for compute in self.computations.itervalues() \ if not compute.zombie and compute.pending_results] for compute in phoenix: files = [f for f in os.listdir(compute.dest_path) \ if f.startswith('_dispy_job_reply_')] # limit number queued so as not to take up too much time files = files[:min(len(files), 128)] for f in files: result_file = os.path.join(compute.dest_path, f) try: fd = open(result_file, 'rb') job_result = pickle.load(fd) fd.close() except: logger.debug('Could not load "%s"', result_file) logger.debug(traceback.format_exc()) continue try: os.remove(result_file) except: logger.debug('Could not remove "%s"', result_file) compute.pending_results -= 1 job_info = _DispyJobInfo(job_result, (compute.scheduler_ip_addr, compute.job_result_port), compute) Coro(self._send_job_reply, job_info, resending=True) self.lock.release() for compute in zombies: sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock = AsynCoroSocket(sock, blocking=False) sock.settimeout(1) logger.debug('Sending TERMINATE to %s', compute.scheduler_ip_addr) data = serialize({'ip_addr':self.address[0], 'port':self.address[1], 'sign':self.signature}) yield sock.sendto('TERMINATED:%s' % data, (compute.scheduler_ip_addr, compute.scheduler_port)) sock.close() if self.scheduler_ip_addr is None and self.avail_cpus == self.cpus: self.pulse_interval = None reset = True yield self.send_pong_msg(coro=coro)
def relay_pings(self, ip_addr='', netmask=None, node_port=51348, scheduler_node=None, scheduler_port=51347): netaddr = None if not netmask: try: ip_addr, bits = ip_addr.split('/') socket.inet_aton(ip_addr) netmask = (0xffffffff << (32 - int(bits))) & 0xffffffff netaddr = (struct.unpack( '>L', socket.inet_aton(ip_addr))[0]) & netmask except: netmask = '255.255.255.255' if ip_addr: socket.inet_aton(ip_addr) else: ip_addr = socket.gethostbyname(socket.gethostname()) if not netaddr and netmask: try: if isinstance(netmask, str): netmask = struct.unpack('>L', socket.inet_aton(netmask))[0] else: assert isinstance(netmask, int) assert netmask > 0 netaddr = (struct.unpack( '>L', socket.inet_aton(ip_addr))[0]) & netmask except: logger.warning('Invalid netmask') try: socket.inet_ntoa(struct.pack('>L', netaddr)) socket.inet_ntoa(struct.pack('>L', netmask)) except: netaddr = netmask = None bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) scheduler_ip_addrs = list( filter(lambda ip: bool(ip), [_node_ipaddr(scheduler_node)])) if scheduler_ip_addrs and scheduler_port: relay_request = { 'ip_addrs': scheduler_ip_addrs, 'port': scheduler_port, 'version': _dispy_version, 'sign': None } bc_sock.sendto(b'PING:' + serialize(relay_request), ('<broadcast>', node_port)) bc_sock.close() node_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) node_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) node_sock.bind(('', node_port)) sched_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sched_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sched_sock.bind(('', scheduler_port)) logger.info('Listening on %s:%s/%s', ip_addr, node_port, scheduler_port) while True: ready = select.select([node_sock, sched_sock], [], [])[0] for sock in ready: if sock == node_sock: msg, addr = node_sock.recvfrom(1024) if not msg.startswith(b'PING:'): logger.debug('Ignoring message "%s" from %s', msg[:min(len(msg), 5)], addr[0]) continue if netaddr and \ (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr: logger.debug('Ignoring ping back (from %s)', addr[0]) continue logger.debug('Ping message from %s (%s)', addr[0], addr[1]) try: info = unserialize(msg[len(b'PING:'):]) if info['version'] != _dispy_version: logger.warning( 'Ignoring %s due to version mismatch: %s / %s', info['ip_addrs'], info['version'], _dispy_version) continue scheduler_ip_addrs = info['ip_addrs'] + [addr[0]] scheduler_port = info['port'] except: logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) logger.debug(traceback.format_exc()) continue logger.debug('relaying ping from %s / %s' % (info['ip_addrs'], addr[0])) bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) bc_sock.sendto(b'PING:' + serialize(info), ('<broadcast>', node_port)) bc_sock.close() else: assert sock == sched_sock msg, addr = sched_sock.recvfrom(1024) if msg.startswith( b'PING:' ) and scheduler_ip_addrs and scheduler_port: try: info = unserialize(msg[len(b'PING:'):]) if netaddr and info.get('scheduler_ip_addr', None) and \ (struct.unpack('>L', socket.inet_aton(info['scheduler_ip_addr']))[0] & netmask) == netaddr: logger.debug('Ignoring ping back (from %s)' % addr[0]) continue assert info['version'] == _dispy_version # assert isinstance(info['cpus'], int) msg = { 'ip_addrs': scheduler_ip_addrs, 'port': scheduler_port, 'version': _dispy_version } relay_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) relay_sock.sendto(b'PING:' + serialize(msg), (info['ip_addr'], info['port'])) relay_sock.close() except: logger.debug(traceback.format_exc()) # raise logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1])
def relay_pings(self, ip_addr='', netmask=None, node_port=51348, scheduler_node=None, scheduler_port=51347): netaddr = None if not netmask: try: ip_addr, bits = ip_addr.split('/') socket.inet_aton(ip_addr) netmask = (0xffffffff << (32 - int(bits))) & 0xffffffff netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask except: netmask = '255.255.255.255' if ip_addr: socket.inet_aton(ip_addr) else: ip_addr = socket.gethostbyname(socket.gethostname()) if not netaddr and netmask: try: if isinstance(netmask, str): netmask = struct.unpack('>L', socket.inet_aton(netmask))[0] else: assert isinstance(netmask, int) assert netmask > 0 netaddr = (struct.unpack('>L', socket.inet_aton(ip_addr))[0]) & netmask except: logger.warning('Invalid netmask') try: socket.inet_ntoa(struct.pack('>L', netaddr)) socket.inet_ntoa(struct.pack('>L', netmask)) except: netaddr = netmask = None scheduler_version = _dispy_version bc_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) bc_sock.bind(('', 0)) bc_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) scheduler_ip_addr = _node_ipaddr(scheduler_node) if scheduler_ip_addr and scheduler_port: relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr, 'scheduler_port':scheduler_port, 'version':scheduler_version}) bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port)) ping_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) ping_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) ping_sock.bind(('', node_port)) pong_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) pong_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) pong_sock.bind(('', scheduler_port)) logger.info('Listening on %s:%s', ip_addr, node_port) last_ping = 0 while True: ready = select.select([ping_sock, pong_sock], [], [])[0] for sock in ready: if sock == ping_sock: msg, addr = ping_sock.recvfrom(1024) if not msg.startswith('PING:'): logger.debug('Ignoring message "%s" from %s', msg[:max(len(msg), 5)], addr[0]) continue if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr: logger.debug('Ignoring own ping (from %s)', addr[0]) continue if (time.time() - last_ping) < 10: logger.warning('Ignoring ping (from %s) for 10 more seconds', addr[0]) time.sleep(10) last_ping = time.time() logger.debug('Ping message from %s (%s)', addr[0], addr[1]) try: data = unserialize(msg[len('PING:'):]) scheduler_ip_addr = data['scheduler_ip_addr'] scheduler_port = data['scheduler_port'] scheduler_version = data['version'] assert isinstance(scheduler_ip_addr, str) assert isinstance(scheduler_port, int) except: logger.debug('Ignoring ping message from %s (%s)', addr[0], addr[1]) continue relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr, 'scheduler_port':scheduler_port, 'version':scheduler_version}) bc_sock.sendto('PING:%s' % relay_request, ('<broadcast>', node_port)) else: assert sock == pong_sock msg, addr = pong_sock.recvfrom(1024) if not msg.startswith('PONG:'): logger.debug('Ignoring pong message "%s" from %s', msg[:max(len(msg), 5)], addr[0]) continue # if netaddr and (struct.unpack('>L', socket.inet_aton(addr[0]))[0] & netmask) == netaddr: # logger.debug('Ignoring own pong (from %s)', addr[0]) # continue if not (scheduler_ip_addr and scheduler_port): logger.debug('Ignoring pong message from %s', str(addr)) continue logger.debug('Pong message from %s (%s)', addr[0], addr[1]) try: pong = unserialize(msg[len('PONG:'):]) assert isinstance(pong['host'], str) assert isinstance(pong['port'], int) assert isinstance(pong['cpus'], int) relay_request = serialize({'scheduler_ip_addr':scheduler_ip_addr, 'scheduler_port':scheduler_port, 'version':scheduler_version}) relay_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) relay_sock.sendto('PING:%s' % relay_request, (pong['host'], node_port)) relay_sock.close() except: # raise logger.debug('Ignoring pong message from %s (%s)', addr[0], addr[1])