Exemple #1
0
    def init_app(self):
        super(TaskProxy, self).init_app()
        logging.setup("task dispatch proxy.")
        LOG.info("task dispatch proxy run.")

        self._init_zk()
        self._build_listen()
        self.app_service = AppService()
Exemple #2
0
class TaskProxy(QApplication):
    name = 'task proxy'
    version = 'v2.0'

    def init_app(self):
        super(TaskProxy, self).init_app()
        logging.setup("task dispatch proxy.")
        LOG.info("task dispatch proxy run.")

        self._init_zk()
        self._build_listen()
        self.app_service = AppService()

    def run(self):
        # part 1
        self.run_status = False

        @self.zk.ChildrenWatch(self.root)
        def children_changes(children):
            LOG.warn("....watch root(%s)'s childen changed: %r"
                     % (self.root, children))
            master = min(children)
            LOG.info("....current master is : %s" % master)
            path = "%s/%s" % (self.root, master)
            info = self.zk.get(path)
            master_val = info[0]
            LOG.info("....get master(%s) node value is : %s"
                     % (master, master_val))
            native_ip = self._get_local_ip()
            if master_val == native_ip:
                LOG.info("....node(%s) is master, so runing."
                         % (master))
                self.run_status = True
            else:
                LOG.warn("....node(%s) is not master, so wait."
                         % (native_ip))
                self.run_status = False

        # part 2
        rlist = [self.fd]  # wait until ready for reading
        wlist = []         # wait until ready for writing
        timeout = 10       # select timeout

        cs_pool = {}       # manage each client socket's data
        cs_mapping = {}    # mapping cs => (ip, port)

        while True:
            if not self.run_status:
                time.sleep(1)
                continue

            rs, ws, es = select.select(rlist, wlist, rlist, timeout)
            if not (rs or ws or es):
                continue   # no clients connect to server

            # readable - current handle readable socket.
            for s in rs:
                if s is self.fd:
                    # current socket is listen socket, so accept.
                    cs, (rhost, rport) = s.accept()
                    LOG.info("....adding node(%s, %s) to the ring."
                             % (rhost, rport))
                    cs.setblocking(False)
                    rlist.append(cs)
                    cs_mapping[cs] = (rhost, rport)
                    cs_pool[cs] = Queue.Queue()  # for the cs create data pool
                else:
                    # current socket is client socket, so receive.
                    try:
                        peer_info = s.getpeername()
                    except:
                        self._handle_failed(s, rlist, wlist, cs_pool)
                        rhost, rport = cs_mapping.get(s)
                        if cs_mapping.get(s): del cs_mapping[s]
                        LOG.info("....removing node(%s, %s) from the ring."
                                 % (rhost, rport))
                        continue

                    head_data = self._recvmsg(s, HEADER_BYTES)
                    if not head_data:
                        self._handle_failed(s, rlist, wlist, cs_pool)
                        rhost, rport = cs_mapping.get(s)
                        if cs_mapping.get(s): del cs_mapping[s]
                        LOG.info("....removing node(%s, %s) from the ring."
                                 % (rhost, rport))
                        continue

                    head_msg = struct.unpack("!II", head_data)
                    command_id = head_msg[0]
                    if command_id == HEARTBEAT:
                        #print "recv heartbeat from node(%s)" % str(peer_info)
                        ack_msg = struct.pack("!II", ACK_HEART, 0)
                        cs_pool[s].put(ack_msg)

                    elif command_id == NORMALMSG:
                        #print "recv task info from node(%s)" % str(peer_info)
                        live_instancs = [
                            cs_mapping.get(cs) for cs, queue in cs_pool.items()
                        ]
                        LOG.info("....ring have spider instances:( %s )."
                                 % live_instancs)

                        task_sock_mapping = self._get_task_sock_mapping(
                                cs_pool
                        )
                        self._fill_task_data_to_cspool(
                                task_sock_mapping, cs_pool, s
                        )

                    # add the cs to writeable set for write response
                    if s not in ws:
                        ws.append(s)

            # writeable - current handle writeable socket
            for s in ws:
                try:
                    s.getpeername()
                except:
                    self._handle_failed(s, rlist, wlist, cs_pool)
                    rhost, rport = cs_mapping.get(s)
                    if cs_mapping.get(s): del cs_mapping[s]
                    LOG.info("....removing node(%s, %s) from the ring."
                             % (rhost, rport))
                    continue

                try:
                    msg = cs_pool[s].get_nowait()
                except Queue.Empty:
                    if s in wlist: wlist.remove(s)
                    continue

                ret = self._sendmsg(s, msg)
                if not ret:
                    self._handle_failed(s, rlist, wlist, cs_pool)
                    rhost, rport = cs_mapping.get(s)
                    if cs_mapping.get(s): del cs_mapping[s]
                    LOG.info("....removing node(%s, %s) from the ring."
                             % (rhost, rport))
                    continue

            # exceptable - current handle exceptional socket
            for s in es:
                self._handle_failed(s, rlist, wlist, cs_pool)
                rhost, rport = cs_mapping.get(s)
                if cs_mapping.get(s): del cs_mapping[s]
                LOG.info("....removing node(%s, %s) from the ring."
                         % (rhost, rport))

    def _handle_failed(self, s, rlist, wlist, cs_pool):
        self._close(s)
        if s in rlist:
            rlist.remove(s)
        if s in wlist:
            wlist.remove(s)
        if s in cs_pool:
            del cs_pool[s]

    def _close(self, cs):
        try:
            cs.close()
        except:
            pass

    def _get_task_data(self):
        app_infos = self.app_service.fetch_apps()
        for info in app_infos:
            app_id = info.get("app_id")
            app_name = info.get("app_name")
            urls = self.app_service.fetch_urls(app_id)
            computes = self.app_service.fetch_computes(app_id)
            task_info = {}
            task_info["group"] = app_name
            task_info["urls"] = urls
            task_info["computes"] = computes
            yield task_info

    def _get_task_sock_mapping(self, cs_pool):
        """ 将现有任务与链接的client socket进行关联
        """
        # 分配任务
        task_list = list(self._get_task_data())
        if not task_list:
            LOG.warn("pull task list is failed......")
            return

        task_length = len(task_list)
        worker_nums = len(cs_pool)
        threshold = task_length / worker_nums
        task_infos = [(lambda i: task_list[i*threshold:task_length]
                      if (i+1) == worker_nums
                      else task_list[i*threshold: (i+1)*threshold])(i)
                      for i in xrange(worker_nums)]

        # 任务关联:
        task_sock_mapping = {}
        task_index = 0
        for sock, ret in cs_pool.items():
            task_sock_mapping[sock] = task_infos[task_index]
            task_index += 1
        return task_sock_mapping

    def _fill_task_data_to_cspool(self, task_sock_mapping, cs_pool, cs):
        """ 根据对应的client socket取出相应的数据
        """
        task_data = task_sock_mapping.get(cs)
        task_msg = json.dumps(task_data)
        msglen = len(task_msg)
        package_len = HEADER_BYTES + msglen
        head_msg = struct.pack("!II", ACK_NORMAL, package_len)
        body_fmt = "!%ds" % msglen
        body_msg = struct.pack(body_fmt, task_msg)
        ack_msg = head_msg + body_msg
        cs_pool[cs].put(ack_msg)

    def _build_listen(self):
        proxy_host = self._get_local_ip()
        proxy_port = CONF.PROXY.proxy_port
        self.fd = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.fd.setblocking(False)
        self.fd.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.fd.bind((proxy_host, proxy_port))
        self.fd.listen(10)
        LOG.info("listen socket running on (%s, %s)."
                 % (proxy_host, proxy_port))

    def _get_local_ip(self):
        host_name = socket.gethostname()
        fqdn = socket.getfqdn(host_name)
        ip = socket.gethostbyname(fqdn)
        return ip

    def _sendmsg(self, cs, msg):
        def wrap(cs):
            totalsent = 0
            msglen = len(msg)
            if 0 == msglen: return False
            while totalsent < msglen:
                sent = cs.send(msg[totalsent:])
                if 0 == sent:
                    return False
                totalsent += sent
            return True
        return self._retry(wrap, cs, 'send')

    def _recvmsg(self, cs, msglen):
        def wrap(cs):
            chunks = []
            bytes_recd = 0
            while bytes_recd < msglen:
                chunk = cs.recv(min(msglen - bytes_recd, 2048))
                if '' == chunk:
                    return chunk
                bytes_recd += len(chunk)
                chunks.append(chunk)
            return ''.join(chunks)
        return self._retry(wrap, cs, 'recv')

    def _retry(self, func, cs, flag):
        """No need to retry, because client is disconnect.
        """
        result = None
        try:
            result = func(cs)
        except Exception as _ex:
            LOG.error("carray out (%s) is failed: %s" % (flag, str(_ex)))
        return result

    def _init_zk(self):
        self.root = CONF.ZK.zk_root
        try:
            self.zk = KazooClient(hosts=CONF.ZK.zk_hosts)
            self.zk.start(timeout=15)
        except Exception as _ex:
            LOG.error("connect to zk is timeout, err: %s" % str(_ex))
            return

        sub_node = '%s/dispatcher' % self.root
        self.zk.create(sub_node,
                       self._get_local_ip(),
                       ephemeral=True,
                       sequence=True,
                       makepath=True
        )