Ejemplo n.º 1
0
def start_self_upgrade(cmd=False):
    # 是否是通过命令行启动的自升级程序
    logger.info("start self upgrade, cmd:%s", cmd)
    upgrade_path = os.path.join(constants.BASE_DIR, 'yzy_upgrade')
    if not cmd:
        if os.path.exists(const.SELF_UPGRADE_FILE):
            os.remove(const.SELF_UPGRADE_FILE)
        exe_cmd = [upgrade_path, "self_upgrade"]
        subprocess.Popen(exe_cmd)
        return get_error_result()
    logger.info("begin stop upgrade")
    # 停止升级服务
    stdout, stderr = execute("systemctl", "stop", "yzy-upgrade")
    if stderr:
        return get_error_result("StopServiceError", service="yzy-upgrade")
    logger.info("start replace file")
    try:
        os.remove(upgrade_path)
        source = os.path.join(const.UPGRADE_KVM_PATH, 'yzy_upgrade')
        logger.info("copy %s to %s", source, upgrade_path)
        shutil.copy2(source, upgrade_path)
    except:
        logger.exception("copy file failed", exc_info=True)
        return get_error_result("UpgradeSlavesError")
    # 重启服务
    stdout, stderr = execute("systemctl", "start", "yzy-upgrade")
    if stderr:
        return get_error_result("StartServiceError", service="yzy-upgrade")
    # 增加自升级标志
    with open(const.SELF_UPGRADE_FILE, 'w') as fd:
        fd.write("")
    return get_error_result()
Ejemplo n.º 2
0
    def rollback_services(self, master=False):
        try:
            service_list = ["yzy-compute", "yzy-monitor"]
            if master:
                service_list.extend([
                    "yzy-server", "yzy-scheduler", "yzy-terminal",
                    "yzy-terminal-agent", "nginx", "yzy-web"
                ])

            for service_name in service_list:
                logger.info("restart service %s", service_name)
                stdout, stderr = execute("systemctl", "restart", service_name)
                if stderr:
                    return get_error_result("StartServiceError",
                                            service=service_name)

        except Exception as e:
            logger.exception("rollback_services Exception: %s" % str(e),
                             exc_info=True)
            return get_error_result("OtherError")

        # 检查旧版服务是否启动成功
        failed_ret = self._check_services_status(master)
        if failed_ret:
            return get_error_result("StartServiceError",
                                    service=", ".join(failed_ret))

        # 回滚完成,清空升级包目录、临时目录
        self._clean_pkg_dirs()

        return get_error_result()
Ejemplo n.º 3
0
    def rollback_publish(self, package_id, package_path):
        logger.info("rollback publish upgrade package on compute nodes")
        nodes = db_api.get_node_with_all({})
        tasks = list()
        failed_nodes = list()

        with ThreadPoolExecutor(max_workers=constants.MAX_THREADS) as executor:
            for node in nodes:
                task = executor.submit(self._sync_delete_package, node.ip,
                                       package_id, package_path)
                tasks.append(task)
            for future in as_completed(tasks):
                res = future.result()
                if res.get("code") != 0:
                    logger.error(
                        "node: %s rollback publish upgrade package failed: %s",
                        res.get("ipaddr", ""), res.get("msg", ""))
                    failed_nodes.append({
                        "ipaddr": res.get("ipaddr", ""),
                        "msg": res.get("msg", "")
                    })

        if failed_nodes:
            return get_error_result("UploadPackageSyncError",
                                    {"failed_nodes": failed_nodes})

        return get_error_result("Success")
Ejemplo n.º 4
0
 def check_vip(self, vip):
     try:
         self._check_vip(5, vip)
         ret = get_error_result()
     except Exception as e:
         logging.exception(str(e), exc_info=True)
         ret = get_error_result("SwitchHaMasterError")
     return ret
Ejemplo n.º 5
0
    def request_bt_server(self, service_name, request_data):
        # sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            if not self.socket:
                self.socket_init()

            ret_str = json.dumps(request_data)
            service_code = name_service_code[service_name]
            _size, msg = YzyProtocol().create_paket(
                service_code,
                ret_str.encode("utf-8"),
                b'',
                sequence_code=6666,
                req_or_res=YzyProtocolType.REQ,
                client_type=ClientType.SERVER)
            logger.info("Send request msg size: {}, msg: {}".format(
                _size, msg))
            # sock.connect(self.bt_ip_port)
            self.socket.send(msg)
            head_msg = self.socket.recv(YzyProtocol.header_length)
            if not msg or len(msg) < YzyProtocol.header_length:
                get_head_len = 0 if not msg else len(msg)
                logger.error("Get head error, length {}".format(get_head_len))
            paket_struct = YzyProtocol().parse_paket_header(head_msg)
            logger.info("Receive head msg: {}".format(head_msg))
            logger.debug("Parse head: {}".format(paket_struct))
            body_length = paket_struct.data_size + paket_struct.token_length + paket_struct.supplementary
            if paket_struct.req_or_res == 2:  # 1-request, 2-response
                logger.debug(
                    "Get response: service_code[{}-{}], sequence_no[{}] ".
                    format(
                        paket_struct.service_code,
                        service_code_name[paket_struct.service_code],
                        paket_struct.sequence_code,
                    ))

                body = self.socket.recv(body_length)
                paket_struct.set_data(body)
                logger.debug("Get body: {}".format(body))
                if not body:
                    logger.error("bt api GET BODY ERROR !")
                    raise Exception("BT API GET BODY ERROR !")
                ret_data = paket_struct.data_json()
                logger.debug("Parsed body: {}".format(ret_data))
            else:
                logger.error("message head req_or_res type error")
                self.socket.close()
                return get_error_result("BtResponseMsgError")
            # sock.close()
            return ret_data
        except Exception as err:
            logger.error("tcp socket error: %s" % err)
            logger.error(''.join(traceback.format_exc()))
            if self.socket:
                self.socket.close()
                self.socket = None
            return get_error_result("OtherError")
Ejemplo n.º 6
0
    def sync(self, data):
        if not data:
            return get_error_result("UpgradeRequestParamError")

        command = data.pop('command')
        if command == 'download':
            return self.agent.get_package_from_controller(data)
        elif command == 'delete':
            return self.agent.delete_dirty_package(data)
        else:
            return get_error_result("UpgradeRequestParamError")
Ejemplo n.º 7
0
 def client_biz_processor(self, client, is_req, seq_id, handler_name,
                          message):
     logger.debug(
         "client: {}, is_req: {}, seq_id: {}, handler_name: {} message: {}".
         format(client, is_req, seq_id, handler_name, message)[:1000])
     # client, is_req, seq_id, handler_name, message))
     if message.get("mac", None):
         message["mac"] = message["mac"].upper()
     terminal_mac = client.mac
     method_name = "client_%s" % handler_name
     if (not message.get('token', None) and method_name != "client_terminal_login") or \
             (message.get('token', None) and
              self.clients.get(terminal_mac, None) and
              (message['token'].decode('utf-8') != self.clients[terminal_mac].token)):
         ret = get_error_result("TerminalTokenError", msg="en")
         logger.error("voi terminal token error: %s" % client)
         return ret
     if hasattr(self, method_name):
         func = getattr(self, method_name)
         ret = func(client, message)
         logger.debug(
             "Client request method_name(no flask request): {}, ret: {}".
             format(method_name, ret))
         return ret
     logger.info("terminal_mac: {}, client: {}, method_name: {}".format(
         terminal_mac, client, method_name))
     if terminal_mac in self.clients.keys():
         thread_id = ctypes.CDLL('libc.so.6').syscall(186)
         thread_ident = threading.currentThread().ident
         logger.info(
             "terminal clients: %s pid: %s, ppid: %s, tid: %s, t_ident: %s"
             % (self.clients, os.getpid(), os.getppid(), thread_id,
                thread_ident))
         message.pop('supplementary')
         message.pop('token')
         message["service_name"] = handler_name
         message["terminal_mac"] = terminal_mac
         message["terminal_ip"] = client.client_ip
         # 通知服务端
         _data = {
             # "cmd": handler_name if is_req else (handler_name + "_response"),
             "cmd": handler_name if is_req else "command_response",
             "data": message
         }
         logger.info("voi terminal request server : %s" % _data)
         ret = voi_terminal_post("/api/v1/voi/terminal/task/", _data)
         logger.info("voi terminal server return: %s" % ret)
     else:
         ret = get_error_result("TerminalNotLogin", msg="en")
         logger.error("voi terminal not login: %s" % client)
         client.socket_client.socket.close()
     return ret
Ejemplo n.º 8
0
def upgrade_cluster():
    # 确保所有节点都在线
    ret = UpgradeManager().check_node_status()
    if ret.get('code', 0) != 0:
        return ret
    logger.info("start self upgrade, check node status success")
    slave_ips = ret['slaves']

    tasks = list()
    failed_nodes = list()
    url = "api/v1/index/self_upgrade"
    with ThreadPoolExecutor(max_workers=constants.MAX_THREADS) as executor:
        for node_ip in slave_ips:
            task = executor.submit(upgrade_post, node_ip, url, data={})
            tasks.append(task)
        for future in as_completed(tasks):
            res = future.result()
            if res.get("code") != 0:
                logger.error("url:%s failed: %s, msg: %s" %
                             (url, res.get("ipaddr", ""), res.get("msg", "")))
                failed_nodes.append(res.get("ipaddr", ""))
    if failed_nodes:
        return get_error_result("UpgradeSlavesError",
                                {"failed_nodes": failed_nodes})
    while True:
        result = []
        url = "api/v1/index/get_self_upgrade_status"
        for node_ip in slave_ips:
            try:
                ret = upgrade_post(node_ip, url, data={})
                if ret.get('code') != 0:
                    result.append(False)
                else:
                    result.append(True)
            except Exception as e:
                logger.exception("get self upgrade state in node %s failed",
                                 node_ip)
                result.append(False)

        # 所有节点升级完成
        if all(result):
            logger.info("Other host self upgrade successful")
            break

        # 每秒检测一次
        time.sleep(1)
    start_self_upgrade()
    return get_error_result()
Ejemplo n.º 9
0
 def client_terminal_logout(self, client, message=None):
     logger.info("client_logout: %s" % client)
     terminal_mac = client.mac
     if terminal_mac in self.clients.keys():
         self.clients.pop(terminal_mac)
         thread_id = ctypes.CDLL('libc.so.6').syscall(186)
         logger.info(
             "terminal clients: %s pid: %s, ppid: %s, tid: %s, t_ident: %s"
             % (self.clients, os.getpid(), os.getppid(), thread_id,
                threading.currentThread().ident))
         # 通知服务端
         _data = {
             "cmd": "terminal_logout",
             "data": {
                 "mac": terminal_mac,
                 "ip": client.client_ip,
                 "port": client.client_port
             }
         }
         ret = voi_terminal_post("/api/v1/voi/terminal/task/", _data)
         logger.info("voi terminal server return: %s" % ret)
         if ret.get("code") != 0:
             logger.error("voi terminal client_logout error: %s" % ret)
         logger.info("client %s,  end......" % client)
         return ret
     else:
         logger.error("voi terminal logout error: %s is not exist" %
                      terminal_mac)
         logger.info("client %s,  end......" % client)
         return get_error_result("Success", "en")
Ejemplo n.º 10
0
 def switch_ha_master(self, new_vip_host_ip, vip):
     # 重启vip_host(本节点)上的keepalived服务,vip将自动切换至new_vip_host
     self._run_cmd("systemctl restart keepalived", SwitchHaMasterException)
     self._notify_new_vip_host_check(new_vip_host_ip, vip)
     logging.info("switch master to new_vip_host_ip %s success" %
                  new_vip_host_ip)
     return get_error_result()
Ejemplo n.º 11
0
 def client_except_exit(self, client, message=None):
     terminal_mac = client.mac
     terminal_ip = client.client_ip
     terminal_port = client.client_port
     logger.info("client_except_exit: %s" % terminal_mac)
     if terminal_mac in self.clients.keys():
         self.clients[terminal_mac] = client
         self.clients[terminal_mac].heartbeat = False
         # self.clients[terminal_mac].alive_timestamp = dt.datetime.now()
         ip_port = self.ip_port_str(terminal_ip, terminal_port)
         self.ip_port_mac[ip_port] = terminal_mac
         thread_id = ctypes.CDLL('libc.so.6').syscall(186)
         logger.info(
             "terminal clients: %s pid: %s, ppid: %s, tid: %s, t_ident: %s"
             % (self.clients, os.getpid(), os.getppid(), thread_id,
                threading.currentThread().ident))
         # 通知服务端
         _data = {
             "cmd": "terminal_except_exit",
             "data": {
                 "mac": terminal_mac,
                 "ip": client.client_ip,
             }
         }
         ret = voi_terminal_post("/api/v1/voi/terminal/task/", _data)
         logger.info("voi terminal server return: %s" % ret)
         if ret.get("code") != 0:
             logger.error("voi terminal client_except_exit error: %s" % ret)
             return ret
     else:
         logger.debug("voi terminal client_except_exit: %s is not exist" %
                      terminal_mac)
         logger.info("client %s,  end......" % client)
         return get_error_result("Success", "en")
Ejemplo n.º 12
0
    def start_backup(self):
        try:
            # 9、启动keepalived服务,注意必须后启备控的,VIP才能绑在主控上
            backup_cmd = [
                "chmod +x %s" % self.check_brain_file,
                "chmod +x %s" % self.notify_sh_file,
                # "mysql -u{user} -p{pwd} -e \"start slave;\"".format(user=self.db_user, pwd=self.db_pwd),
                "systemctl enable --now keepalived"
            ]
            for cmd in backup_cmd:
                self._run_cmd(cmd, EnableHaException)

            ret = get_error_result()
        except Exception as e:
            logging.exception(str(e), exc_info=True)
            ret = get_error_result("StartBackupHAError")
        return ret
Ejemplo n.º 13
0
 def disable_ha(self,
                vip_host_ip,
                peer_host_ip,
                paths,
                voi_template_list=None,
                voi_xlms=None,
                post_data=None):
     try:
         # 先停peer_host,后停本地vip_host
         self._disable_backup(peer_host_ip, paths, voi_template_list,
                              voi_xlms)
         self._disable_master(vip_host_ip, post_data)
         ret = get_error_result()
     except Exception as e:
         logging.exception(str(e), exc_info=True)
         ret = get_error_result("DisableHAError")
     return ret
Ejemplo n.º 14
0
    def _stop_services(self, master_ip, slave_ips):
        # 主控节关闭自己的服务
        ret = self.manger.stop_services(master=True)
        if ret.get('code') != 0:
            logger.error("stop service failed: %s" % master_ip)
            ret['data'] = {'failed_nodes': [master_ip]}
            return ret

        # 通知各计算节点关闭服务
        failed_nodes = self.manger.notify_slaves(
            slave_ips, url="api/v1/index/stop_slave_services")
        if failed_nodes:
            logger.error("stop service failed: ", failed_nodes)
            ret = get_error_result("StopSlavesServiceError",
                                   data={'failed_nodes': failed_nodes})
            return ret

        return get_error_result()
Ejemplo n.º 15
0
    def check_backup_ha_status(self, quorum_ip, sensitivity, paths):
        try:
            code, out = cmdutils.run_cmd("systemctl status keepalived",
                                         ignore_log=True)
            if code != 0 or "active (running)" not in out:
                keepalived_status = constants.HA_STATUS_FAULT
                logging.error("keepalived not running")
            else:
                keepalived_status = constants.HA_STATUS_NORMAL

            if quorum_ip:
                if not icmp_ping(quorum_ip, timeout=1, count=sensitivity):
                    quorum_ip_status = constants.HA_STATUS_FAULT
                    logging.error("ping quorum_ip[%s] failed" % quorum_ip)
                else:
                    quorum_ip_status = constants.HA_STATUS_NORMAL
            else:
                quorum_ip_status = constants.HA_STATUS_UNKNOWN

            code, out = cmdutils.run_cmd(
                "mysql -u{user} -p{pwd} -e \"SHOW SLAVE STATUS\G;\" |grep \"Error \""
                .format(user=self.db_user, pwd=self.db_pwd),
                ignore_log=True)
            if out:
                mysql_slave_status = constants.HA_STATUS_FAULT
                logging.error("mysql slave status error: %s", out)
            else:
                mysql_slave_status = constants.HA_STATUS_NORMAL

            file_sync_status = constants.HA_STATUS_NORMAL
            for path in paths:
                if not os.path.exists(path):
                    file_sync_status = constants.HA_STATUS_FAULT
                    break

            ret = get_error_result("Success",
                                   data=[
                                       keepalived_status, quorum_ip_status,
                                       mysql_slave_status, file_sync_status
                                   ])
        except Exception as e:
            logging.exception(str(e), exc_info=True)
            ret = get_error_result("OtherError")
        return ret
Ejemplo n.º 16
0
    def upload(self, file_obj):
        # 保存上传的升级包文件
        size = 0
        logger.info("go to upload func")
        package_id = create_uuid()
        base_path = constants.UPGRADE_FILE_PATH

        if not os.path.exists(base_path):
            os.makedirs(base_path)

        package_path = os.path.join(base_path, "".join([package_id,
                                                        ".tar.gz"]))
        logger.info("begin save upgrade compress file to %s", package_path)
        try:
            md5_sum = hashlib.md5()
            with open(package_path, "wb") as f:
                for chunk in chunks(file_obj):
                    size += len(chunk)
                    md5_sum.update(chunk)
                    f.write(chunk)
            f.close()
            md5_sum = md5_sum.hexdigest()

            # 解压升级包
            if not decompress_package(package_path):
                return get_error_result("UpgradePackageFormatError",
                                        data={"package_path": package_path})

            # 校验升级包
            if not self._check_package():
                return get_error_result("PackageNotMatchSystem",
                                        data={"package_path": package_path})

        except Exception:
            logger.exception("save upgrade package error", exc_info=True)
            return get_error_result("OtherError",
                                    data={"package_path": package_path})

        return get_error_result("Success",
                                data={
                                    "package_id": package_id,
                                    "package_path": package_path,
                                    "md5_value": md5_sum
                                })
Ejemplo n.º 17
0
    def _start_upgrade(self, master_ip, slave_ips):
        """备份、替换、运行升级脚本、启服务"""
        # 通知各计算节点升级
        failed_nodes = self.manger.notify_slaves(
            slave_ips, url="api/v1/index/upgrade_slave")
        if failed_nodes:
            logger.error("upgrade process failed:", failed_nodes)
            ret = get_error_result("UpgradeSlavesError",
                                   data={'failed_nodes': failed_nodes})
            return ret

        # 主控节点升级自己
        ret = self.manger.upgrade_process(master=True)
        if ret.get('code') != 0:
            logger.error("upgrade process failed: %s" % master_ip)
            ret['data'] = {'failed_nodes': [master_ip]}
            return ret

        return get_error_result()
Ejemplo n.º 18
0
    def delete_dirty_package(self, data):
        """删除本计算节点上的残包"""
        package_id = data.get("package_id")
        package_path = data.get("package_path")
        if not package_id or not package_path:
            return get_error_result("UpgradeRequestParamError")

        try:
            # 删除升级包
            if os.path.exists(package_path):
                os.remove(package_path)
            # 删除解压用的临时目录
            if os.path.exists(constants.UPGRADE_TMP_PATH):
                shutil.rmtree(constants.UPGRADE_TMP_PATH)
            return get_error_result("Success")
        except Exception as e:
            logger.exception("delete the package failed: package_path: %s" %
                             package_path,
                             exc_info=True)
            return get_error_result("OtherError")
Ejemplo n.º 19
0
    def rollback_process(self, master=False):
        # 若已备份,则使用备份还原;若未备份,则直接启动服务
        if os.path.exists(constants.UPGRADE_BACKUP_PATH):
            logger.info('backup exists, use backup to rollback')
            # 执行回滚脚本
            res = self._run_script(
                os.path.join(constants.UPGRADE_KVM_PATH,
                             constants.ROLLBACK_SCRIPT_RELATIVE_PATH))
            if not res:
                return get_error_result("RunRollbackScriptFailed")

            # 清空项目目录
            self._clear_server_dir(all=True)

            # 把备份代码拷贝到项目目录
            res = self._rollback_yzy_server(constants.UPGRADE_BACKUP_PATH,
                                            const.BASE_DIR)
            if not res:
                return get_error_result("MoveFileFailed")

        else:
            logger.info('backup don`t exists, start services to rollback')

        # 启动旧版服务
        res = self._start_services(master)
        if res.get('code') != 0:
            return res

        # 检查旧版服务是否启动成功
        failed_ret = self._check_services_status(master)
        if failed_ret:
            return get_error_result("StartServiceError",
                                    service=", ".join(failed_ret))

        # 回滚完成,清空升级包目录、临时目录、备份目录
        self._clean_pkg_dirs()

        logger.info("rollback upgrade process success")
        return get_error_result()
Ejemplo n.º 20
0
    def publish(self, package_id, package_path, md5_value=None):
        logger.info("sync the upgrade package to compute nodes")
        controller_image = db_api.get_controller_image()
        nodes = db_api.get_node_with_all({})
        tasks = list()
        failed_nodes = list()
        bind = SERVER_CONF.addresses.get_by_default('upgrade_bind', '')
        if bind:
            port = bind.split(':')[-1]
        else:
            port = constants.UPGRADE_DEFAULT_PORT

        with ThreadPoolExecutor(max_workers=constants.MAX_THREADS) as executor:
            for node in nodes:
                if node.type in [
                        constants.ROLE_MASTER_AND_COMPUTE,
                        constants.ROLE_MASTER
                ]:
                    continue
                task = executor.submit(
                    self._sync_download_package,
                    "http://%s:%s" % (controller_image.ip, port), node.ip,
                    package_id, package_path, md5_value)
                tasks.append(task)
            for future in as_completed(tasks):
                res = future.result()
                if res.get("code") != 0:
                    logger.error("node :%s sync upgrade package failed: %s",
                                 res.get("ipaddr", ""), res.get("msg", ""))
                    failed_nodes.append({
                        "ipaddr": res.get("ipaddr", ""),
                        "msg": res.get("msg", "")
                    })

        if failed_nodes:
            return get_error_result("UploadPackageSyncError",
                                    {"failed_nodes": failed_nodes})

        return get_error_result("Success")
Ejemplo n.º 21
0
    def _start_services(self, master=False):
        try:
            service_list = ["yzy-compute", "yzy-monitor"]
            if master:
                service_list.extend([
                    "yzy-server", "yzy-scheduler", "yzy-terminal",
                    "yzy-terminal-agent", "nginx", "yzy-web"
                ])

            for service_name in service_list:
                logger.info("start service %s", service_name)
                stdout, stderr = execute("systemctl", "start", service_name)
                if stderr:
                    return get_error_result("StartServiceError",
                                            service=service_name)

        except Exception as e:
            logger.exception("start services exception: %s" % str(e),
                             exc_info=True)
            return get_error_result("OtherError")

        return get_error_result()
Ejemplo n.º 22
0
    def check(self):
        # 检测是否处于基础镜像同传状态
        pools = db_api.get_resource_pool_list()
        for pool in pools:
            status = self.manger.get_base_image_status(pool)
            if status != 0:
                logger.info("pool: %s, status: %d" % (pool.name, status))
                return get_error_result("ImageTaskRunning")

        # 检测是否有模板差异盘同传
        templates = db_api.get_template_with_all({})
        for template in templates:
            if template.status in [
                    constants.STATUS_SAVING, constants.STATUS_CREATING,
                    constants.STATUS_COPING, constants.STATUS_ROLLBACK,
                    constants.STATUS_UPDATING
            ]:
                logger.info("template: %s, status: %d" %
                            (template.name, template.status))
                return get_error_result("ImageTaskRunning")
        templates = db_api.get_voi_template_with_all({})
        for template in templates:
            if template.status in [
                    constants.STATUS_SAVING, constants.STATUS_CREATING,
                    constants.STATUS_COPING, constants.STATUS_ROLLBACK,
                    constants.STATUS_UPDATING
            ]:
                logger.info("template: %s, status: %d" %
                            (template.name, template.status))
                return get_error_result("ImageTaskRunning")
            # status = self.manger.get_storages_status(template)
            # if status != 0:
            #     logger.info("template: %s, status: %d" % (template.name, status))
            #     return get_error_result("ImageTaskRunning")

        # TODO 检测终端升级包没有处于分发状态
        return get_error_result("Success")
Ejemplo n.º 23
0
    def upload_and_publish(self, file_obj):
        if not file_obj:
            logger.error("no file_obj to upload")
            return get_error_result("NoPackageToUpload")

        if not file_obj.filename.endswith('.tar.gz'):
            return get_error_result("PackageTypeError")

        # 上传升级包并解压校验,失败则回滚(主控删除升级包、清空临时目录)
        upload_ret = self.manger.upload(file_obj)
        if upload_ret.get("code") != 0:
            logger.error("upgrade package upload fail, start rollback_upload")
            self.manger.rollback_upload()
            return upload_ret

        logger.info("upgrade package upload success")

        # 向计算节点分发升级包,失败则回滚(计算节点和主控都删除升级包、清空临时目录)
        data = upload_ret["data"]
        publish_ret = self.manger.publish(data["package_id"],
                                          data["package_path"],
                                          data.get("md5_value"))
        if publish_ret.get("code") != 0:
            logger.error(
                "upgrade package publish fail, start rollback_publish")
            self.manger.rollback_publish(data["package_id"],
                                         data["package_path"])
            logger.error("upgrade package publish fail, start rollback_upload")
            self.manger.rollback_upload()
            return publish_ret

        logger.info("upgrade package publish success")
        if os.path.exists(constants.SELF_UPGRADE_FLAG):
            need_self = True
        else:
            need_self = False
        return get_error_result("Success", {"self_upgrade": need_self})
Ejemplo n.º 24
0
    def upgrade_process(self, master=False):
        # 备份节点上的旧版代码,不备份升级服务
        res = self._backup_yzy_server()
        if not res:
            return get_error_result("UpgradeBackupFailed")

        # 清理项目目录,保留旧版升级服务、static、templates、config
        self._clear_server_dir()

        # 把临时目录中的新版代码拷贝到项目目录,不拷贝升级服务,config目录只拷贝新增文件
        res = self._copy_dir(constants.UPGRADE_KVM_PATH, const.BASE_DIR)
        if not res:
            return get_error_result("CopyFileFailed")

        # 执行升级脚本
        res = self._run_script(
            os.path.join(constants.UPGRADE_KVM_PATH,
                         constants.UPGRADE_SCRIPT_RELATIVE_PATH))
        if not res:
            return get_error_result("RunUpgradeScriptFailed")

        # 启动新版服务
        res = self._start_services(master)
        if res.get('code') != 0:
            return res
        time.sleep(2)
        # 检查新版服务是否启动成功
        failed_ret = self._check_services_status(master)
        if failed_ret:
            return get_error_result("StartServiceError",
                                    service=", ".join(failed_ret))

        # 升级完成,清空升级包目录、临时目录、备份目录
        self._clean_pkg_dirs()

        logger.info('upgrade process success')
        return get_error_result()
Ejemplo n.º 25
0
    def execute_disable_backup(self,
                               paths,
                               voi_template_list=None,
                               voi_xlms=None):
        logging.info("start execute_disable_backup")
        # 1、在/etc/my.cnf.d/mariadb-server.cnf删除7个参数**
        self._update_conf_del(self.mysql_cnf,
                              self.master_content + self.backup_content)

        # 2、禁用keepalived服务
        # 3、停止slave线程、删除所有复制连接参数、重置bin_log、删除主从复制账户
        # 4、禁用mariadb服务
        master_cmd = [
            "systemctl disable --now keepalived",
            "mysql -u{user} -p{pwd} -e \"STOP SLAVE;\"".format(
                user=self.db_user, pwd=self.db_pwd),
            "mysql -u{user} -p{pwd} -e \"RESET SLAVE ALL;\"".format(
                user=self.db_user, pwd=self.db_pwd),
            "mysql -u{user} -p{pwd} -e \"RESET MASTER;\"".format(
                user=self.db_user, pwd=self.db_pwd),
            "mysql -u{user} -p{pwd} -e \"DROP USER IF EXISTS 'replicater'@'%';\""
            .format(user=self.db_user, pwd=self.db_pwd),
            "systemctl disable --now mariadb",
        ]
        for cmd in master_cmd:
            self._run_cmd(cmd)

        # 删除VOI模板的basepan、差异盘、种子文件、XML
        voi_files = list()
        if voi_template_list:
            for image_path_dict in voi_template_list:
                voi_files.append(image_path_dict["disk_path"])
                voi_files.extend(image_path_dict["image_path_list"])
                voi_files.extend(image_path_dict["torrent_path_list"])
        if voi_xlms:
            voi_files.extend(voi_xlms)
        logging.debug("voi_files: %s", voi_files)

        # 5、删除keepalived配置文件、相关sh文件、授权文件、ISO库、数据库备份文件等
        for file in [
                self.keep_cnf, self.check_brain_file, self.notify_sh_file,
                self.db_dump_file, self.flag_file
        ] + self.license_files + paths + voi_files:
            self._remove_file(file)

        # 清空mysql的data_dir
        self.del_file(self.mysql_data_path)
        logging.info("finish execute_disable_backup success")
        return get_error_result()
Ejemplo n.º 26
0
 def check_node_status(self):
     nodes = db_api.get_node_with_all({})
     master = None
     slaves = list()
     for node in nodes:
         if not icmp_ping(
                 node.ip) or node.status == constants.STATUS_SHUTDOWN:
             return get_error_result("NodeIPConnetFail")
         if node.type in [
                 constants.ROLE_MASTER_AND_COMPUTE, constants.ROLE_MASTER
         ]:
             master = node.ip
         else:
             slaves.append(node.ip)
     return {"master": master, "slaves": slaves}
Ejemplo n.º 27
0
    def start_upgrade(self):
        # 确保所有节点都在线
        ret = self.manger.check_node_status()
        if ret.get('code', 0) != 0:
            return ret
        logger.info("start upgrade, check node status success")
        master_ip = ret['master']
        slave_ips = ret['slaves']
        # 停旧版服务,失败则回滚(重启旧版服务)
        ret = self._stop_services(master_ip, slave_ips)
        if ret.get('code', 0) != 0:
            rollback_failed_nodes = self._rollback_services(
                master_ip, slave_ips)
            if rollback_failed_nodes:
                return get_error_result(
                    "RollbackServiceError",
                    data={"rollback_failed_nodes": rollback_failed_nodes})
            logger.info('rollback services success')
            return ret

        time.sleep(2)
        logger.info("stop services in all nodes success")
        # 执行升级过程,失败则回滚(使用备份还原,重启旧版服务)
        ret = self._start_upgrade(master_ip, slave_ips)
        if ret.get('code', 0) != 0:
            rollback_failed_nodes = self._rollback_upgrade(
                master_ip, slave_ips)
            if rollback_failed_nodes:
                return get_error_result(
                    "RollbackUpgradeError",
                    data={"rollback_failed_nodes": rollback_failed_nodes})
            logger.info('rollback upgrade in all nodes success')
            return ret

        logger.info("upgrade service in all nodes success")
        return get_error_result()
Ejemplo n.º 28
0
 def client_heartbeat(self, client, message=None):
     logger.debug("terminal clients: %s " % self.clients.keys())
     terminal_mac = client.mac
     resp = get_error_result("Success", msg="en")
     resp["data"] = {}
     now_timestamp = dt.datetime.now()
     resp["data"]["datetime"] = now_timestamp.strftime('%Y-%m-%d %H:%M:%S')
     if terminal_mac in self.clients:
         terminal = self.clients[terminal_mac]
         self.clients[terminal_mac].last_status = terminal.heartbeat
         self.clients[terminal_mac].heartbeat = client.status
         self.clients[terminal_mac].alive_timestamp = now_timestamp
     else:
         logger.error("terminal : %s is not exist" % client)
     return resp
Ejemplo n.º 29
0
 def get_self_upgrade_status(self):
     if os.path.exists(constants.SELF_UPGRADE_FILE):
         return get_error_result()
     return get_error_result("OtherError")
Ejemplo n.º 30
0
    def get_package_from_controller(self, data):
        """本计算节点从主控节点下载升级包"""
        logger.info("get_package_from_controller: data: %s" % data)
        package_id = data.get("package_id")
        package_path = data.get("package_path")
        controller_image_ip = data.get("controller_image_ip")
        md5_value = data.get("md5_value")
        if not package_id or not package_path or not controller_image_ip:
            return get_error_result("UpgradeRequestParamError")

        url = constants.UPGRADE_FILE_DOWNLOAD_URL
        data = {
            "package_id": package_id,
            "package_path": package_path,
        }
        logger.info("get_package_from_controller: url: %s, data: %s" %
                    (url, data))
        package_chunks = self._download(controller_image_ip, url, package_id,
                                        package_path)
        logger.info("start to save the package on path: %s" % package_path)

        base_path, filename = os.path.split(package_path)
        if not os.path.exists(base_path):
            os.makedirs(base_path)

        data = open(package_path, 'wb')
        close_file = True
        md5_sum = hashlib.md5()

        try:
            for chunk in package_chunks:
                md5_sum.update(chunk)
                data.write(chunk)

            ret = get_error_result("Success")

            if md5_value:
                logging.info("check md5, md5_value:%s, file_md5_sum:%s",
                             md5_value, md5_sum.hexdigest())
                if md5_sum.hexdigest() != md5_value:
                    logging.error(
                        "the package_id: %s, md5_value:%s, the receive file_md5_sum:%s"
                        % (package_id, md5_value, md5_sum.hexdigest()))
                    ret = get_error_result("UpgradePackageMd5Failed")

            # 解压升级包
            if not decompress_package(package_path):
                ret = get_error_result("UpgradePackageFormatError")

        except Exception:
            logger.exception("get upgrade package from controller error",
                             exc_info=True)
            ret = get_error_result("OtherError")
        finally:
            if close_file:
                # Ensure that the data is pushed all the way down to
                # persistent storage. This ensures that in the event of a
                # subsequent host crash we don't have running instances
                # using a corrupt backing file.
                data.flush()
                self._safe_fsync(data)
                data.close()

        return ret