def init_app(self): super(TaskProxy, self).init_app() logging.setup("task dispatch proxy.") LOG.info("task dispatch proxy run.") self._init_zk() self._build_listen() self.app_service = AppService()
class TaskProxy(QApplication): name = 'task proxy' version = 'v2.0' def init_app(self): super(TaskProxy, self).init_app() logging.setup("task dispatch proxy.") LOG.info("task dispatch proxy run.") self._init_zk() self._build_listen() self.app_service = AppService() def run(self): # part 1 self.run_status = False @self.zk.ChildrenWatch(self.root) def children_changes(children): LOG.warn("....watch root(%s)'s childen changed: %r" % (self.root, children)) master = min(children) LOG.info("....current master is : %s" % master) path = "%s/%s" % (self.root, master) info = self.zk.get(path) master_val = info[0] LOG.info("....get master(%s) node value is : %s" % (master, master_val)) native_ip = self._get_local_ip() if master_val == native_ip: LOG.info("....node(%s) is master, so runing." % (master)) self.run_status = True else: LOG.warn("....node(%s) is not master, so wait." % (native_ip)) self.run_status = False # part 2 rlist = [self.fd] # wait until ready for reading wlist = [] # wait until ready for writing timeout = 10 # select timeout cs_pool = {} # manage each client socket's data cs_mapping = {} # mapping cs => (ip, port) while True: if not self.run_status: time.sleep(1) continue rs, ws, es = select.select(rlist, wlist, rlist, timeout) if not (rs or ws or es): continue # no clients connect to server # readable - current handle readable socket. for s in rs: if s is self.fd: # current socket is listen socket, so accept. cs, (rhost, rport) = s.accept() LOG.info("....adding node(%s, %s) to the ring." % (rhost, rport)) cs.setblocking(False) rlist.append(cs) cs_mapping[cs] = (rhost, rport) cs_pool[cs] = Queue.Queue() # for the cs create data pool else: # current socket is client socket, so receive. try: peer_info = s.getpeername() except: self._handle_failed(s, rlist, wlist, cs_pool) rhost, rport = cs_mapping.get(s) if cs_mapping.get(s): del cs_mapping[s] LOG.info("....removing node(%s, %s) from the ring." % (rhost, rport)) continue head_data = self._recvmsg(s, HEADER_BYTES) if not head_data: self._handle_failed(s, rlist, wlist, cs_pool) rhost, rport = cs_mapping.get(s) if cs_mapping.get(s): del cs_mapping[s] LOG.info("....removing node(%s, %s) from the ring." % (rhost, rport)) continue head_msg = struct.unpack("!II", head_data) command_id = head_msg[0] if command_id == HEARTBEAT: #print "recv heartbeat from node(%s)" % str(peer_info) ack_msg = struct.pack("!II", ACK_HEART, 0) cs_pool[s].put(ack_msg) elif command_id == NORMALMSG: #print "recv task info from node(%s)" % str(peer_info) live_instancs = [ cs_mapping.get(cs) for cs, queue in cs_pool.items() ] LOG.info("....ring have spider instances:( %s )." % live_instancs) task_sock_mapping = self._get_task_sock_mapping( cs_pool ) self._fill_task_data_to_cspool( task_sock_mapping, cs_pool, s ) # add the cs to writeable set for write response if s not in ws: ws.append(s) # writeable - current handle writeable socket for s in ws: try: s.getpeername() except: self._handle_failed(s, rlist, wlist, cs_pool) rhost, rport = cs_mapping.get(s) if cs_mapping.get(s): del cs_mapping[s] LOG.info("....removing node(%s, %s) from the ring." % (rhost, rport)) continue try: msg = cs_pool[s].get_nowait() except Queue.Empty: if s in wlist: wlist.remove(s) continue ret = self._sendmsg(s, msg) if not ret: self._handle_failed(s, rlist, wlist, cs_pool) rhost, rport = cs_mapping.get(s) if cs_mapping.get(s): del cs_mapping[s] LOG.info("....removing node(%s, %s) from the ring." % (rhost, rport)) continue # exceptable - current handle exceptional socket for s in es: self._handle_failed(s, rlist, wlist, cs_pool) rhost, rport = cs_mapping.get(s) if cs_mapping.get(s): del cs_mapping[s] LOG.info("....removing node(%s, %s) from the ring." % (rhost, rport)) def _handle_failed(self, s, rlist, wlist, cs_pool): self._close(s) if s in rlist: rlist.remove(s) if s in wlist: wlist.remove(s) if s in cs_pool: del cs_pool[s] def _close(self, cs): try: cs.close() except: pass def _get_task_data(self): app_infos = self.app_service.fetch_apps() for info in app_infos: app_id = info.get("app_id") app_name = info.get("app_name") urls = self.app_service.fetch_urls(app_id) computes = self.app_service.fetch_computes(app_id) task_info = {} task_info["group"] = app_name task_info["urls"] = urls task_info["computes"] = computes yield task_info def _get_task_sock_mapping(self, cs_pool): """ 将现有任务与链接的client socket进行关联 """ # 分配任务 task_list = list(self._get_task_data()) if not task_list: LOG.warn("pull task list is failed......") return task_length = len(task_list) worker_nums = len(cs_pool) threshold = task_length / worker_nums task_infos = [(lambda i: task_list[i*threshold:task_length] if (i+1) == worker_nums else task_list[i*threshold: (i+1)*threshold])(i) for i in xrange(worker_nums)] # 任务关联: task_sock_mapping = {} task_index = 0 for sock, ret in cs_pool.items(): task_sock_mapping[sock] = task_infos[task_index] task_index += 1 return task_sock_mapping def _fill_task_data_to_cspool(self, task_sock_mapping, cs_pool, cs): """ 根据对应的client socket取出相应的数据 """ task_data = task_sock_mapping.get(cs) task_msg = json.dumps(task_data) msglen = len(task_msg) package_len = HEADER_BYTES + msglen head_msg = struct.pack("!II", ACK_NORMAL, package_len) body_fmt = "!%ds" % msglen body_msg = struct.pack(body_fmt, task_msg) ack_msg = head_msg + body_msg cs_pool[cs].put(ack_msg) def _build_listen(self): proxy_host = self._get_local_ip() proxy_port = CONF.PROXY.proxy_port self.fd = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.fd.setblocking(False) self.fd.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.fd.bind((proxy_host, proxy_port)) self.fd.listen(10) LOG.info("listen socket running on (%s, %s)." % (proxy_host, proxy_port)) def _get_local_ip(self): host_name = socket.gethostname() fqdn = socket.getfqdn(host_name) ip = socket.gethostbyname(fqdn) return ip def _sendmsg(self, cs, msg): def wrap(cs): totalsent = 0 msglen = len(msg) if 0 == msglen: return False while totalsent < msglen: sent = cs.send(msg[totalsent:]) if 0 == sent: return False totalsent += sent return True return self._retry(wrap, cs, 'send') def _recvmsg(self, cs, msglen): def wrap(cs): chunks = [] bytes_recd = 0 while bytes_recd < msglen: chunk = cs.recv(min(msglen - bytes_recd, 2048)) if '' == chunk: return chunk bytes_recd += len(chunk) chunks.append(chunk) return ''.join(chunks) return self._retry(wrap, cs, 'recv') def _retry(self, func, cs, flag): """No need to retry, because client is disconnect. """ result = None try: result = func(cs) except Exception as _ex: LOG.error("carray out (%s) is failed: %s" % (flag, str(_ex))) return result def _init_zk(self): self.root = CONF.ZK.zk_root try: self.zk = KazooClient(hosts=CONF.ZK.zk_hosts) self.zk.start(timeout=15) except Exception as _ex: LOG.error("connect to zk is timeout, err: %s" % str(_ex)) return sub_node = '%s/dispatcher' % self.root self.zk.create(sub_node, self._get_local_ip(), ephemeral=True, sequence=True, makepath=True )