def start_self_upgrade(cmd=False): # 是否是通过命令行启动的自升级程序 logger.info("start self upgrade, cmd:%s", cmd) upgrade_path = os.path.join(constants.BASE_DIR, 'yzy_upgrade') if not cmd: if os.path.exists(const.SELF_UPGRADE_FILE): os.remove(const.SELF_UPGRADE_FILE) exe_cmd = [upgrade_path, "self_upgrade"] subprocess.Popen(exe_cmd) return get_error_result() logger.info("begin stop upgrade") # 停止升级服务 stdout, stderr = execute("systemctl", "stop", "yzy-upgrade") if stderr: return get_error_result("StopServiceError", service="yzy-upgrade") logger.info("start replace file") try: os.remove(upgrade_path) source = os.path.join(const.UPGRADE_KVM_PATH, 'yzy_upgrade') logger.info("copy %s to %s", source, upgrade_path) shutil.copy2(source, upgrade_path) except: logger.exception("copy file failed", exc_info=True) return get_error_result("UpgradeSlavesError") # 重启服务 stdout, stderr = execute("systemctl", "start", "yzy-upgrade") if stderr: return get_error_result("StartServiceError", service="yzy-upgrade") # 增加自升级标志 with open(const.SELF_UPGRADE_FILE, 'w') as fd: fd.write("") return get_error_result()
def rollback_services(self, master=False): try: service_list = ["yzy-compute", "yzy-monitor"] if master: service_list.extend([ "yzy-server", "yzy-scheduler", "yzy-terminal", "yzy-terminal-agent", "nginx", "yzy-web" ]) for service_name in service_list: logger.info("restart service %s", service_name) stdout, stderr = execute("systemctl", "restart", service_name) if stderr: return get_error_result("StartServiceError", service=service_name) except Exception as e: logger.exception("rollback_services Exception: %s" % str(e), exc_info=True) return get_error_result("OtherError") # 检查旧版服务是否启动成功 failed_ret = self._check_services_status(master) if failed_ret: return get_error_result("StartServiceError", service=", ".join(failed_ret)) # 回滚完成,清空升级包目录、临时目录 self._clean_pkg_dirs() return get_error_result()
def rollback_publish(self, package_id, package_path): logger.info("rollback publish upgrade package on compute nodes") nodes = db_api.get_node_with_all({}) tasks = list() failed_nodes = list() with ThreadPoolExecutor(max_workers=constants.MAX_THREADS) as executor: for node in nodes: task = executor.submit(self._sync_delete_package, node.ip, package_id, package_path) tasks.append(task) for future in as_completed(tasks): res = future.result() if res.get("code") != 0: logger.error( "node: %s rollback publish upgrade package failed: %s", res.get("ipaddr", ""), res.get("msg", "")) failed_nodes.append({ "ipaddr": res.get("ipaddr", ""), "msg": res.get("msg", "") }) if failed_nodes: return get_error_result("UploadPackageSyncError", {"failed_nodes": failed_nodes}) return get_error_result("Success")
def check_vip(self, vip): try: self._check_vip(5, vip) ret = get_error_result() except Exception as e: logging.exception(str(e), exc_info=True) ret = get_error_result("SwitchHaMasterError") return ret
def request_bt_server(self, service_name, request_data): # sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: if not self.socket: self.socket_init() ret_str = json.dumps(request_data) service_code = name_service_code[service_name] _size, msg = YzyProtocol().create_paket( service_code, ret_str.encode("utf-8"), b'', sequence_code=6666, req_or_res=YzyProtocolType.REQ, client_type=ClientType.SERVER) logger.info("Send request msg size: {}, msg: {}".format( _size, msg)) # sock.connect(self.bt_ip_port) self.socket.send(msg) head_msg = self.socket.recv(YzyProtocol.header_length) if not msg or len(msg) < YzyProtocol.header_length: get_head_len = 0 if not msg else len(msg) logger.error("Get head error, length {}".format(get_head_len)) paket_struct = YzyProtocol().parse_paket_header(head_msg) logger.info("Receive head msg: {}".format(head_msg)) logger.debug("Parse head: {}".format(paket_struct)) body_length = paket_struct.data_size + paket_struct.token_length + paket_struct.supplementary if paket_struct.req_or_res == 2: # 1-request, 2-response logger.debug( "Get response: service_code[{}-{}], sequence_no[{}] ". format( paket_struct.service_code, service_code_name[paket_struct.service_code], paket_struct.sequence_code, )) body = self.socket.recv(body_length) paket_struct.set_data(body) logger.debug("Get body: {}".format(body)) if not body: logger.error("bt api GET BODY ERROR !") raise Exception("BT API GET BODY ERROR !") ret_data = paket_struct.data_json() logger.debug("Parsed body: {}".format(ret_data)) else: logger.error("message head req_or_res type error") self.socket.close() return get_error_result("BtResponseMsgError") # sock.close() return ret_data except Exception as err: logger.error("tcp socket error: %s" % err) logger.error(''.join(traceback.format_exc())) if self.socket: self.socket.close() self.socket = None return get_error_result("OtherError")
def sync(self, data): if not data: return get_error_result("UpgradeRequestParamError") command = data.pop('command') if command == 'download': return self.agent.get_package_from_controller(data) elif command == 'delete': return self.agent.delete_dirty_package(data) else: return get_error_result("UpgradeRequestParamError")
def client_biz_processor(self, client, is_req, seq_id, handler_name, message): logger.debug( "client: {}, is_req: {}, seq_id: {}, handler_name: {} message: {}". format(client, is_req, seq_id, handler_name, message)[:1000]) # client, is_req, seq_id, handler_name, message)) if message.get("mac", None): message["mac"] = message["mac"].upper() terminal_mac = client.mac method_name = "client_%s" % handler_name if (not message.get('token', None) and method_name != "client_terminal_login") or \ (message.get('token', None) and self.clients.get(terminal_mac, None) and (message['token'].decode('utf-8') != self.clients[terminal_mac].token)): ret = get_error_result("TerminalTokenError", msg="en") logger.error("voi terminal token error: %s" % client) return ret if hasattr(self, method_name): func = getattr(self, method_name) ret = func(client, message) logger.debug( "Client request method_name(no flask request): {}, ret: {}". format(method_name, ret)) return ret logger.info("terminal_mac: {}, client: {}, method_name: {}".format( terminal_mac, client, method_name)) if terminal_mac in self.clients.keys(): thread_id = ctypes.CDLL('libc.so.6').syscall(186) thread_ident = threading.currentThread().ident logger.info( "terminal clients: %s pid: %s, ppid: %s, tid: %s, t_ident: %s" % (self.clients, os.getpid(), os.getppid(), thread_id, thread_ident)) message.pop('supplementary') message.pop('token') message["service_name"] = handler_name message["terminal_mac"] = terminal_mac message["terminal_ip"] = client.client_ip # 通知服务端 _data = { # "cmd": handler_name if is_req else (handler_name + "_response"), "cmd": handler_name if is_req else "command_response", "data": message } logger.info("voi terminal request server : %s" % _data) ret = voi_terminal_post("/api/v1/voi/terminal/task/", _data) logger.info("voi terminal server return: %s" % ret) else: ret = get_error_result("TerminalNotLogin", msg="en") logger.error("voi terminal not login: %s" % client) client.socket_client.socket.close() return ret
def upgrade_cluster(): # 确保所有节点都在线 ret = UpgradeManager().check_node_status() if ret.get('code', 0) != 0: return ret logger.info("start self upgrade, check node status success") slave_ips = ret['slaves'] tasks = list() failed_nodes = list() url = "api/v1/index/self_upgrade" with ThreadPoolExecutor(max_workers=constants.MAX_THREADS) as executor: for node_ip in slave_ips: task = executor.submit(upgrade_post, node_ip, url, data={}) tasks.append(task) for future in as_completed(tasks): res = future.result() if res.get("code") != 0: logger.error("url:%s failed: %s, msg: %s" % (url, res.get("ipaddr", ""), res.get("msg", ""))) failed_nodes.append(res.get("ipaddr", "")) if failed_nodes: return get_error_result("UpgradeSlavesError", {"failed_nodes": failed_nodes}) while True: result = [] url = "api/v1/index/get_self_upgrade_status" for node_ip in slave_ips: try: ret = upgrade_post(node_ip, url, data={}) if ret.get('code') != 0: result.append(False) else: result.append(True) except Exception as e: logger.exception("get self upgrade state in node %s failed", node_ip) result.append(False) # 所有节点升级完成 if all(result): logger.info("Other host self upgrade successful") break # 每秒检测一次 time.sleep(1) start_self_upgrade() return get_error_result()
def client_terminal_logout(self, client, message=None): logger.info("client_logout: %s" % client) terminal_mac = client.mac if terminal_mac in self.clients.keys(): self.clients.pop(terminal_mac) thread_id = ctypes.CDLL('libc.so.6').syscall(186) logger.info( "terminal clients: %s pid: %s, ppid: %s, tid: %s, t_ident: %s" % (self.clients, os.getpid(), os.getppid(), thread_id, threading.currentThread().ident)) # 通知服务端 _data = { "cmd": "terminal_logout", "data": { "mac": terminal_mac, "ip": client.client_ip, "port": client.client_port } } ret = voi_terminal_post("/api/v1/voi/terminal/task/", _data) logger.info("voi terminal server return: %s" % ret) if ret.get("code") != 0: logger.error("voi terminal client_logout error: %s" % ret) logger.info("client %s, end......" % client) return ret else: logger.error("voi terminal logout error: %s is not exist" % terminal_mac) logger.info("client %s, end......" % client) return get_error_result("Success", "en")
def switch_ha_master(self, new_vip_host_ip, vip): # 重启vip_host(本节点)上的keepalived服务,vip将自动切换至new_vip_host self._run_cmd("systemctl restart keepalived", SwitchHaMasterException) self._notify_new_vip_host_check(new_vip_host_ip, vip) logging.info("switch master to new_vip_host_ip %s success" % new_vip_host_ip) return get_error_result()
def client_except_exit(self, client, message=None): terminal_mac = client.mac terminal_ip = client.client_ip terminal_port = client.client_port logger.info("client_except_exit: %s" % terminal_mac) if terminal_mac in self.clients.keys(): self.clients[terminal_mac] = client self.clients[terminal_mac].heartbeat = False # self.clients[terminal_mac].alive_timestamp = dt.datetime.now() ip_port = self.ip_port_str(terminal_ip, terminal_port) self.ip_port_mac[ip_port] = terminal_mac thread_id = ctypes.CDLL('libc.so.6').syscall(186) logger.info( "terminal clients: %s pid: %s, ppid: %s, tid: %s, t_ident: %s" % (self.clients, os.getpid(), os.getppid(), thread_id, threading.currentThread().ident)) # 通知服务端 _data = { "cmd": "terminal_except_exit", "data": { "mac": terminal_mac, "ip": client.client_ip, } } ret = voi_terminal_post("/api/v1/voi/terminal/task/", _data) logger.info("voi terminal server return: %s" % ret) if ret.get("code") != 0: logger.error("voi terminal client_except_exit error: %s" % ret) return ret else: logger.debug("voi terminal client_except_exit: %s is not exist" % terminal_mac) logger.info("client %s, end......" % client) return get_error_result("Success", "en")
def start_backup(self): try: # 9、启动keepalived服务,注意必须后启备控的,VIP才能绑在主控上 backup_cmd = [ "chmod +x %s" % self.check_brain_file, "chmod +x %s" % self.notify_sh_file, # "mysql -u{user} -p{pwd} -e \"start slave;\"".format(user=self.db_user, pwd=self.db_pwd), "systemctl enable --now keepalived" ] for cmd in backup_cmd: self._run_cmd(cmd, EnableHaException) ret = get_error_result() except Exception as e: logging.exception(str(e), exc_info=True) ret = get_error_result("StartBackupHAError") return ret
def disable_ha(self, vip_host_ip, peer_host_ip, paths, voi_template_list=None, voi_xlms=None, post_data=None): try: # 先停peer_host,后停本地vip_host self._disable_backup(peer_host_ip, paths, voi_template_list, voi_xlms) self._disable_master(vip_host_ip, post_data) ret = get_error_result() except Exception as e: logging.exception(str(e), exc_info=True) ret = get_error_result("DisableHAError") return ret
def _stop_services(self, master_ip, slave_ips): # 主控节关闭自己的服务 ret = self.manger.stop_services(master=True) if ret.get('code') != 0: logger.error("stop service failed: %s" % master_ip) ret['data'] = {'failed_nodes': [master_ip]} return ret # 通知各计算节点关闭服务 failed_nodes = self.manger.notify_slaves( slave_ips, url="api/v1/index/stop_slave_services") if failed_nodes: logger.error("stop service failed: ", failed_nodes) ret = get_error_result("StopSlavesServiceError", data={'failed_nodes': failed_nodes}) return ret return get_error_result()
def check_backup_ha_status(self, quorum_ip, sensitivity, paths): try: code, out = cmdutils.run_cmd("systemctl status keepalived", ignore_log=True) if code != 0 or "active (running)" not in out: keepalived_status = constants.HA_STATUS_FAULT logging.error("keepalived not running") else: keepalived_status = constants.HA_STATUS_NORMAL if quorum_ip: if not icmp_ping(quorum_ip, timeout=1, count=sensitivity): quorum_ip_status = constants.HA_STATUS_FAULT logging.error("ping quorum_ip[%s] failed" % quorum_ip) else: quorum_ip_status = constants.HA_STATUS_NORMAL else: quorum_ip_status = constants.HA_STATUS_UNKNOWN code, out = cmdutils.run_cmd( "mysql -u{user} -p{pwd} -e \"SHOW SLAVE STATUS\G;\" |grep \"Error \"" .format(user=self.db_user, pwd=self.db_pwd), ignore_log=True) if out: mysql_slave_status = constants.HA_STATUS_FAULT logging.error("mysql slave status error: %s", out) else: mysql_slave_status = constants.HA_STATUS_NORMAL file_sync_status = constants.HA_STATUS_NORMAL for path in paths: if not os.path.exists(path): file_sync_status = constants.HA_STATUS_FAULT break ret = get_error_result("Success", data=[ keepalived_status, quorum_ip_status, mysql_slave_status, file_sync_status ]) except Exception as e: logging.exception(str(e), exc_info=True) ret = get_error_result("OtherError") return ret
def upload(self, file_obj): # 保存上传的升级包文件 size = 0 logger.info("go to upload func") package_id = create_uuid() base_path = constants.UPGRADE_FILE_PATH if not os.path.exists(base_path): os.makedirs(base_path) package_path = os.path.join(base_path, "".join([package_id, ".tar.gz"])) logger.info("begin save upgrade compress file to %s", package_path) try: md5_sum = hashlib.md5() with open(package_path, "wb") as f: for chunk in chunks(file_obj): size += len(chunk) md5_sum.update(chunk) f.write(chunk) f.close() md5_sum = md5_sum.hexdigest() # 解压升级包 if not decompress_package(package_path): return get_error_result("UpgradePackageFormatError", data={"package_path": package_path}) # 校验升级包 if not self._check_package(): return get_error_result("PackageNotMatchSystem", data={"package_path": package_path}) except Exception: logger.exception("save upgrade package error", exc_info=True) return get_error_result("OtherError", data={"package_path": package_path}) return get_error_result("Success", data={ "package_id": package_id, "package_path": package_path, "md5_value": md5_sum })
def _start_upgrade(self, master_ip, slave_ips): """备份、替换、运行升级脚本、启服务""" # 通知各计算节点升级 failed_nodes = self.manger.notify_slaves( slave_ips, url="api/v1/index/upgrade_slave") if failed_nodes: logger.error("upgrade process failed:", failed_nodes) ret = get_error_result("UpgradeSlavesError", data={'failed_nodes': failed_nodes}) return ret # 主控节点升级自己 ret = self.manger.upgrade_process(master=True) if ret.get('code') != 0: logger.error("upgrade process failed: %s" % master_ip) ret['data'] = {'failed_nodes': [master_ip]} return ret return get_error_result()
def delete_dirty_package(self, data): """删除本计算节点上的残包""" package_id = data.get("package_id") package_path = data.get("package_path") if not package_id or not package_path: return get_error_result("UpgradeRequestParamError") try: # 删除升级包 if os.path.exists(package_path): os.remove(package_path) # 删除解压用的临时目录 if os.path.exists(constants.UPGRADE_TMP_PATH): shutil.rmtree(constants.UPGRADE_TMP_PATH) return get_error_result("Success") except Exception as e: logger.exception("delete the package failed: package_path: %s" % package_path, exc_info=True) return get_error_result("OtherError")
def rollback_process(self, master=False): # 若已备份,则使用备份还原;若未备份,则直接启动服务 if os.path.exists(constants.UPGRADE_BACKUP_PATH): logger.info('backup exists, use backup to rollback') # 执行回滚脚本 res = self._run_script( os.path.join(constants.UPGRADE_KVM_PATH, constants.ROLLBACK_SCRIPT_RELATIVE_PATH)) if not res: return get_error_result("RunRollbackScriptFailed") # 清空项目目录 self._clear_server_dir(all=True) # 把备份代码拷贝到项目目录 res = self._rollback_yzy_server(constants.UPGRADE_BACKUP_PATH, const.BASE_DIR) if not res: return get_error_result("MoveFileFailed") else: logger.info('backup don`t exists, start services to rollback') # 启动旧版服务 res = self._start_services(master) if res.get('code') != 0: return res # 检查旧版服务是否启动成功 failed_ret = self._check_services_status(master) if failed_ret: return get_error_result("StartServiceError", service=", ".join(failed_ret)) # 回滚完成,清空升级包目录、临时目录、备份目录 self._clean_pkg_dirs() logger.info("rollback upgrade process success") return get_error_result()
def publish(self, package_id, package_path, md5_value=None): logger.info("sync the upgrade package to compute nodes") controller_image = db_api.get_controller_image() nodes = db_api.get_node_with_all({}) tasks = list() failed_nodes = list() bind = SERVER_CONF.addresses.get_by_default('upgrade_bind', '') if bind: port = bind.split(':')[-1] else: port = constants.UPGRADE_DEFAULT_PORT with ThreadPoolExecutor(max_workers=constants.MAX_THREADS) as executor: for node in nodes: if node.type in [ constants.ROLE_MASTER_AND_COMPUTE, constants.ROLE_MASTER ]: continue task = executor.submit( self._sync_download_package, "http://%s:%s" % (controller_image.ip, port), node.ip, package_id, package_path, md5_value) tasks.append(task) for future in as_completed(tasks): res = future.result() if res.get("code") != 0: logger.error("node :%s sync upgrade package failed: %s", res.get("ipaddr", ""), res.get("msg", "")) failed_nodes.append({ "ipaddr": res.get("ipaddr", ""), "msg": res.get("msg", "") }) if failed_nodes: return get_error_result("UploadPackageSyncError", {"failed_nodes": failed_nodes}) return get_error_result("Success")
def _start_services(self, master=False): try: service_list = ["yzy-compute", "yzy-monitor"] if master: service_list.extend([ "yzy-server", "yzy-scheduler", "yzy-terminal", "yzy-terminal-agent", "nginx", "yzy-web" ]) for service_name in service_list: logger.info("start service %s", service_name) stdout, stderr = execute("systemctl", "start", service_name) if stderr: return get_error_result("StartServiceError", service=service_name) except Exception as e: logger.exception("start services exception: %s" % str(e), exc_info=True) return get_error_result("OtherError") return get_error_result()
def check(self): # 检测是否处于基础镜像同传状态 pools = db_api.get_resource_pool_list() for pool in pools: status = self.manger.get_base_image_status(pool) if status != 0: logger.info("pool: %s, status: %d" % (pool.name, status)) return get_error_result("ImageTaskRunning") # 检测是否有模板差异盘同传 templates = db_api.get_template_with_all({}) for template in templates: if template.status in [ constants.STATUS_SAVING, constants.STATUS_CREATING, constants.STATUS_COPING, constants.STATUS_ROLLBACK, constants.STATUS_UPDATING ]: logger.info("template: %s, status: %d" % (template.name, template.status)) return get_error_result("ImageTaskRunning") templates = db_api.get_voi_template_with_all({}) for template in templates: if template.status in [ constants.STATUS_SAVING, constants.STATUS_CREATING, constants.STATUS_COPING, constants.STATUS_ROLLBACK, constants.STATUS_UPDATING ]: logger.info("template: %s, status: %d" % (template.name, template.status)) return get_error_result("ImageTaskRunning") # status = self.manger.get_storages_status(template) # if status != 0: # logger.info("template: %s, status: %d" % (template.name, status)) # return get_error_result("ImageTaskRunning") # TODO 检测终端升级包没有处于分发状态 return get_error_result("Success")
def upload_and_publish(self, file_obj): if not file_obj: logger.error("no file_obj to upload") return get_error_result("NoPackageToUpload") if not file_obj.filename.endswith('.tar.gz'): return get_error_result("PackageTypeError") # 上传升级包并解压校验,失败则回滚(主控删除升级包、清空临时目录) upload_ret = self.manger.upload(file_obj) if upload_ret.get("code") != 0: logger.error("upgrade package upload fail, start rollback_upload") self.manger.rollback_upload() return upload_ret logger.info("upgrade package upload success") # 向计算节点分发升级包,失败则回滚(计算节点和主控都删除升级包、清空临时目录) data = upload_ret["data"] publish_ret = self.manger.publish(data["package_id"], data["package_path"], data.get("md5_value")) if publish_ret.get("code") != 0: logger.error( "upgrade package publish fail, start rollback_publish") self.manger.rollback_publish(data["package_id"], data["package_path"]) logger.error("upgrade package publish fail, start rollback_upload") self.manger.rollback_upload() return publish_ret logger.info("upgrade package publish success") if os.path.exists(constants.SELF_UPGRADE_FLAG): need_self = True else: need_self = False return get_error_result("Success", {"self_upgrade": need_self})
def upgrade_process(self, master=False): # 备份节点上的旧版代码,不备份升级服务 res = self._backup_yzy_server() if not res: return get_error_result("UpgradeBackupFailed") # 清理项目目录,保留旧版升级服务、static、templates、config self._clear_server_dir() # 把临时目录中的新版代码拷贝到项目目录,不拷贝升级服务,config目录只拷贝新增文件 res = self._copy_dir(constants.UPGRADE_KVM_PATH, const.BASE_DIR) if not res: return get_error_result("CopyFileFailed") # 执行升级脚本 res = self._run_script( os.path.join(constants.UPGRADE_KVM_PATH, constants.UPGRADE_SCRIPT_RELATIVE_PATH)) if not res: return get_error_result("RunUpgradeScriptFailed") # 启动新版服务 res = self._start_services(master) if res.get('code') != 0: return res time.sleep(2) # 检查新版服务是否启动成功 failed_ret = self._check_services_status(master) if failed_ret: return get_error_result("StartServiceError", service=", ".join(failed_ret)) # 升级完成,清空升级包目录、临时目录、备份目录 self._clean_pkg_dirs() logger.info('upgrade process success') return get_error_result()
def execute_disable_backup(self, paths, voi_template_list=None, voi_xlms=None): logging.info("start execute_disable_backup") # 1、在/etc/my.cnf.d/mariadb-server.cnf删除7个参数** self._update_conf_del(self.mysql_cnf, self.master_content + self.backup_content) # 2、禁用keepalived服务 # 3、停止slave线程、删除所有复制连接参数、重置bin_log、删除主从复制账户 # 4、禁用mariadb服务 master_cmd = [ "systemctl disable --now keepalived", "mysql -u{user} -p{pwd} -e \"STOP SLAVE;\"".format( user=self.db_user, pwd=self.db_pwd), "mysql -u{user} -p{pwd} -e \"RESET SLAVE ALL;\"".format( user=self.db_user, pwd=self.db_pwd), "mysql -u{user} -p{pwd} -e \"RESET MASTER;\"".format( user=self.db_user, pwd=self.db_pwd), "mysql -u{user} -p{pwd} -e \"DROP USER IF EXISTS 'replicater'@'%';\"" .format(user=self.db_user, pwd=self.db_pwd), "systemctl disable --now mariadb", ] for cmd in master_cmd: self._run_cmd(cmd) # 删除VOI模板的basepan、差异盘、种子文件、XML voi_files = list() if voi_template_list: for image_path_dict in voi_template_list: voi_files.append(image_path_dict["disk_path"]) voi_files.extend(image_path_dict["image_path_list"]) voi_files.extend(image_path_dict["torrent_path_list"]) if voi_xlms: voi_files.extend(voi_xlms) logging.debug("voi_files: %s", voi_files) # 5、删除keepalived配置文件、相关sh文件、授权文件、ISO库、数据库备份文件等 for file in [ self.keep_cnf, self.check_brain_file, self.notify_sh_file, self.db_dump_file, self.flag_file ] + self.license_files + paths + voi_files: self._remove_file(file) # 清空mysql的data_dir self.del_file(self.mysql_data_path) logging.info("finish execute_disable_backup success") return get_error_result()
def check_node_status(self): nodes = db_api.get_node_with_all({}) master = None slaves = list() for node in nodes: if not icmp_ping( node.ip) or node.status == constants.STATUS_SHUTDOWN: return get_error_result("NodeIPConnetFail") if node.type in [ constants.ROLE_MASTER_AND_COMPUTE, constants.ROLE_MASTER ]: master = node.ip else: slaves.append(node.ip) return {"master": master, "slaves": slaves}
def start_upgrade(self): # 确保所有节点都在线 ret = self.manger.check_node_status() if ret.get('code', 0) != 0: return ret logger.info("start upgrade, check node status success") master_ip = ret['master'] slave_ips = ret['slaves'] # 停旧版服务,失败则回滚(重启旧版服务) ret = self._stop_services(master_ip, slave_ips) if ret.get('code', 0) != 0: rollback_failed_nodes = self._rollback_services( master_ip, slave_ips) if rollback_failed_nodes: return get_error_result( "RollbackServiceError", data={"rollback_failed_nodes": rollback_failed_nodes}) logger.info('rollback services success') return ret time.sleep(2) logger.info("stop services in all nodes success") # 执行升级过程,失败则回滚(使用备份还原,重启旧版服务) ret = self._start_upgrade(master_ip, slave_ips) if ret.get('code', 0) != 0: rollback_failed_nodes = self._rollback_upgrade( master_ip, slave_ips) if rollback_failed_nodes: return get_error_result( "RollbackUpgradeError", data={"rollback_failed_nodes": rollback_failed_nodes}) logger.info('rollback upgrade in all nodes success') return ret logger.info("upgrade service in all nodes success") return get_error_result()
def client_heartbeat(self, client, message=None): logger.debug("terminal clients: %s " % self.clients.keys()) terminal_mac = client.mac resp = get_error_result("Success", msg="en") resp["data"] = {} now_timestamp = dt.datetime.now() resp["data"]["datetime"] = now_timestamp.strftime('%Y-%m-%d %H:%M:%S') if terminal_mac in self.clients: terminal = self.clients[terminal_mac] self.clients[terminal_mac].last_status = terminal.heartbeat self.clients[terminal_mac].heartbeat = client.status self.clients[terminal_mac].alive_timestamp = now_timestamp else: logger.error("terminal : %s is not exist" % client) return resp
def get_self_upgrade_status(self): if os.path.exists(constants.SELF_UPGRADE_FILE): return get_error_result() return get_error_result("OtherError")
def get_package_from_controller(self, data): """本计算节点从主控节点下载升级包""" logger.info("get_package_from_controller: data: %s" % data) package_id = data.get("package_id") package_path = data.get("package_path") controller_image_ip = data.get("controller_image_ip") md5_value = data.get("md5_value") if not package_id or not package_path or not controller_image_ip: return get_error_result("UpgradeRequestParamError") url = constants.UPGRADE_FILE_DOWNLOAD_URL data = { "package_id": package_id, "package_path": package_path, } logger.info("get_package_from_controller: url: %s, data: %s" % (url, data)) package_chunks = self._download(controller_image_ip, url, package_id, package_path) logger.info("start to save the package on path: %s" % package_path) base_path, filename = os.path.split(package_path) if not os.path.exists(base_path): os.makedirs(base_path) data = open(package_path, 'wb') close_file = True md5_sum = hashlib.md5() try: for chunk in package_chunks: md5_sum.update(chunk) data.write(chunk) ret = get_error_result("Success") if md5_value: logging.info("check md5, md5_value:%s, file_md5_sum:%s", md5_value, md5_sum.hexdigest()) if md5_sum.hexdigest() != md5_value: logging.error( "the package_id: %s, md5_value:%s, the receive file_md5_sum:%s" % (package_id, md5_value, md5_sum.hexdigest())) ret = get_error_result("UpgradePackageMd5Failed") # 解压升级包 if not decompress_package(package_path): ret = get_error_result("UpgradePackageFormatError") except Exception: logger.exception("get upgrade package from controller error", exc_info=True) ret = get_error_result("OtherError") finally: if close_file: # Ensure that the data is pushed all the way down to # persistent storage. This ensures that in the event of a # subsequent host crash we don't have running instances # using a corrupt backing file. data.flush() self._safe_fsync(data) data.close() return ret