def _handle(text): if text == '': return if text == 'clear': utils.clear_screen() return text = text.replace('-- --help', '?') text = text.replace('--help', '?') text = text.replace('?', '-- --help') err_flg = True try: fire.Fire(component=Command, command=text) err_flg = False except KeyboardInterrupt: msg = message.get('cancel_command_input') logger.warning('\b\b' + msg) except KeyError as ex: logger.warn('[%s] command fail' % text) logger.exception(ex) except TypeError as ex: logger.exception(ex) except IOError as ex: if ex.errno == 2: msg = message.get('error_file_not_exist').format(file=ex.filename) logger.error(msg) else: logger.exception(ex) except EOFError: msg = message.get('cancel_command_input') logger.warning('\b\b' + msg) except utils.CommandError as ex: logger.exception(ex) except FireError: pass except FireExit: pass except ( HostNameError, HostConnectionError, SSHConnectionError, FileNotExistError, YamlSyntaxError, PropsSyntaxError, PropsKeyError, PropsError, SSHCommandError, ClusterRedisError, ClusterNotExistError, ClusterIdError, EnvError, ) as ex: logger.error('{}: {}'.format(ex.class_name(), str(ex))) except LightningDBError as ex: logger.error('[ErrorCode {}] {}'.format(ex.error_code, str(ex))) except BaseException as ex: logger.exception(ex) finally: return err_flg
def is_port_empty(host, port): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) result = True status = 'OK' try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((host, port)) except socket.error as e: result = False if e.errno == errno.EADDRINUSE: status = 'USED' logger.debug('{}:{} is already in use'.format(host, port)) else: status = 'FAIL' logger.exception(e) except Exception as e: result = False status = 'FAIL' logger.exception(e) finally: return result, status
def _deploy(cluster_id, history_save, clean): deploy_state = DeployUtil().get_state(cluster_id) if deploy_state == DEPLOYED: msg = message.get('ask_deploy_again') msg = msg.format(cluster_id=cluster_id) msg = color.yellow(msg) yes = ask_util.askBool(msg, default='n') if not yes: logger.info(message.get('cancel')) return restore_yes = None no_localhost = False current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime()) cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time) conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time) tmp_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, 'tmp') meta = [['NAME', 'VALUE']] path_of_fb = config.get_path_of_fb(cluster_id) conf_path = path_of_fb['conf_path'] props_path = path_of_fb['redis_properties'] cluster_path = path_of_fb['cluster_path'] path_of_cli = config.get_path_of_cli(cluster_id) conf_backup_path = path_of_cli['conf_backup_path'] tmp_backup_path = os.path.join(conf_backup_path, tmp_backup_dir) local_ip = config.get_local_ip() # ask installer installer_path = ask_util.installer() installer_name = os.path.basename(installer_path) meta.append(['installer', installer_name]) # ask restore conf if deploy_state == DEPLOYED: restore_yes = ask_util.askBool(message.get('ask_restore_conf')) meta.append(['restore', restore_yes]) # input props hosts = [] if deploy_state == DEPLOYED: if restore_yes: meta += DeployUtil().get_meta_from_props(props_path) hosts = config.get_props(props_path, 'sr2_redis_master_hosts') else: if not os.path.isdir(conf_backup_path): os.mkdir(conf_backup_path) if os.path.exists(tmp_backup_path): msg = message.get('ask_load_history_of_previous_modification') yes = ask_util.askBool(msg) if not yes: shutil.rmtree(tmp_backup_path) if not os.path.exists(tmp_backup_path): os.mkdir(tmp_backup_path) shutil.copy(os.path.join(conf_path, 'redis.properties'), os.path.join(tmp_backup_path, 'redis.properties')) tmp_props_path = os.path.join(tmp_backup_path, 'redis.properties') editor.edit(tmp_props_path, syntax='sh') meta += DeployUtil().get_meta_from_props(tmp_props_path) hosts = config.get_props(tmp_props_path, 'sr2_redis_master_hosts') else: # new deploy props_dict = ask_util.props(cluster_id, save=history_save) hosts = props_dict['hosts'] meta += DeployUtil().get_meta_from_dict(props_dict) utils.print_table(meta) msg = message.get('confirm_deploy_information') yes = ask_util.askBool(msg) if not yes: logger.info(message.get('cancel')) return # check node status success = Center().check_hosts_connection(hosts, True) if not success: msg = message.get('error_exist_unavailable_host') logger.error(msg) return logger.debug('Connection of all hosts ok.') success = Center().check_include_localhost(hosts) if not success: no_localhost = True # get port info if deploy_state == DEPLOYED: if restore_yes: key = 'sr2_redis_master_ports' m_ports = config.get_props(props_path, key, []) key = 'sr2_redis_slave_ports' s_ports = config.get_props(props_path, key, []) replicas = len(s_ports) // len(m_ports) else: key = 'sr2_redis_master_ports' m_ports = config.get_props(tmp_props_path, key, []) key = 'sr2_redis_slave_ports' s_ports = config.get_props(tmp_props_path, key, []) replicas = len(s_ports) // len(m_ports) else: m_ports = props_dict['master_ports'] s_ports = props_dict['slave_ports'] replicas = props_dict['replicas'] while True: msg = message.get('check_port') logger.info(msg) host_ports_list = [] for host in hosts: host_ports_list.append((host, m_ports + s_ports)) conflict = Center().check_port_is_enable(host_ports_list) if not conflict: logger.info("OK") break utils.print_table([["HOST", "PORT"]] + conflict) msg = message.get('ask_port_collision') msg = color.yellow(msg) yes = ask_util.askBool(msg) if yes: logger.info("OK") break m_ports = ask_util.master_ports(False, cluster_id) replicas = ask_util.replicas(False) s_ports = ask_util.slave_ports(cluster_id, len(m_ports), replicas) if deploy_state == DEPLOYED: if restore_yes: key = 'sr2_redis_master_ports' value = cluster_util.convert_list_2_seq(m_ports) config.set_props(props_path, key, value) key = 'sr2_redis_slave_ports' value = cluster_util.convert_list_2_seq(s_ports) config.set_props(props_path, key, value) else: key = 'sr2_redis_master_ports' value = cluster_util.convert_list_2_seq(m_ports) config.set_props(tmp_props_path, key, value) key = 'sr2_redis_slave_ports' value = cluster_util.convert_list_2_seq(s_ports) config.set_props(tmp_props_path, key, value) else: props_dict['master_ports'] = m_ports props_dict['slave_ports'] = s_ports props_dict['replicas'] = replicas # if pending, delete legacy on each hosts if no_localhost: if DeployUtil().get_state(cluster_id, local_ip) == PENDING: client = net.get_ssh(local_ip) command = 'rm -rf {}'.format(cluster_path) net.ssh_execute(client=client, command=command) client.close() for host in hosts: if DeployUtil().get_state(cluster_id, host) == PENDING: client = net.get_ssh(host) command = 'rm -rf {}'.format(cluster_path) net.ssh_execute(client=client, command=command) client.close() # added_hosts = post_hosts - pre_hosts msg = message.get('check_cluster_exist') logger.info(msg) added_hosts = set(hosts) meta = [] if deploy_state == DEPLOYED: pre_hosts = config.get_props(props_path, 'sr2_redis_master_hosts') added_hosts -= set(pre_hosts) can_deploy = True if no_localhost: added_hosts |= set([local_ip]) for host in added_hosts: client = net.get_ssh(host) is_localhost = Center().is_localhost(host) if is_localhost: if no_localhost: continue if os.path.exists(cluster_path + '/remote'): meta.append([host, color.green('CLEAN')]) continue if net.is_exist(client, cluster_path): meta.append([host, color.red('CLUSTER EXIST')]) can_deploy = False continue meta.append([host, color.green('CLEAN')]) if meta: utils.print_table([['HOST', 'STATUS']] + meta) if not can_deploy: msg = message.get('error_cluster_collision') logger.error(msg) return # if not force: # logger.error("If you trying to force, use option '--force'") # return logger.info('OK') # cluster stop and clean if deploy_state == DEPLOYED and clean: center = Center() cur_cluster_id = config.get_cur_cluster_id(allow_empty_id=True) run_cluster_use(cluster_id) center.update_ip_port() center.stop_redis() center.remove_all_of_redis_log_force() center.cluster_clean() run_cluster_use(cur_cluster_id) # backup conf if deploy_state == DEPLOYED: Center().conf_backup(local_ip, cluster_id, conf_backup_dir) # backup cluster backup_hosts = [] if deploy_state == DEPLOYED: backup_hosts += set(pre_hosts) # if force: # backup_hosts += added_hosts for host in backup_hosts: cluster_path = path_of_fb['cluster_path'] client = net.get_ssh(host) Center().cluster_backup(host, cluster_id, cluster_backup_dir) client.close() # transfer & install msg = message.get('transfer_and_execute_installer') logger.info(msg) target_hosts = hosts + [local_ip] if no_localhost else hosts for host in target_hosts: if not (no_localhost and Center().is_localhost(host)): logger.info(' - {}'.format(host)) client = net.get_ssh(host) cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path) net.ssh_execute(client=client, command=cmd) client.close() DeployUtil().transfer_installer(host, cluster_id, installer_path) try: DeployUtil().install(host, cluster_id, installer_name) except SSHCommandError as ex: msg = message.get('error_execute_installer') msg = msg.format(installer=installer_path) logger.error(msg) logger.exception(ex) return # setup props if deploy_state == DEPLOYED: if restore_yes: tag = conf_backup_dir else: tag = tmp_backup_dir Center().conf_restore(local_ip, cluster_id, tag) else: key = 'sr2_redis_master_hosts' config.make_key_enable(props_path, key) config.set_props(props_path, key, props_dict['hosts']) key = 'sr2_redis_master_ports' config.make_key_enable(props_path, key) value = cluster_util.convert_list_2_seq(props_dict['master_ports']) config.set_props(props_path, key, value) key = 'sr2_redis_slave_hosts' config.make_key_enable(props_path, key) config.set_props(props_path, key, props_dict['hosts']) config.make_key_disable(props_path, key) if props_dict['replicas'] > 0: key = 'sr2_redis_slave_hosts' config.make_key_enable(props_path, key) key = 'sr2_redis_slave_ports' config.make_key_enable(props_path, key) value = cluster_util.convert_list_2_seq(props_dict['slave_ports']) config.set_props(props_path, key, value) key = 'ssd_count' config.make_key_enable(props_path, key) config.set_props(props_path, key, props_dict['ssd_count']) key = 'sr2_redis_data' config.make_key_enable(props_path, key, v1_flg=True) config.make_key_enable(props_path, key, v1_flg=True) config.make_key_disable(props_path, key) config.set_props(props_path, key, props_dict['prefix_of_db_path']) key = 'sr2_redis_db_path' config.make_key_enable(props_path, key, v1_flg=True) config.make_key_enable(props_path, key, v1_flg=True) config.make_key_disable(props_path, key) config.set_props(props_path, key, props_dict['prefix_of_db_path']) key = 'sr2_flash_db_path' config.make_key_enable(props_path, key, v1_flg=True) config.make_key_enable(props_path, key, v1_flg=True) config.make_key_disable(props_path, key) config.set_props(props_path, key, props_dict['prefix_of_db_path']) # synk props msg = message.get('sync_conf') logger.info(msg) for node in hosts: if socket.gethostbyname(node) in config.get_local_ip_list(): continue client = net.get_ssh(node) if not client: msg = message.get('error_ssh_connection').format(host=node) logger.error(msg) return net.copy_dir_to_remote(client, conf_path, conf_path) client.close() # set deploy state complete if os.path.exists(tmp_backup_path): shutil.rmtree(tmp_backup_path) for node in target_hosts: path_of_fb = config.get_path_of_fb(cluster_id) cluster_path = path_of_fb['cluster_path'] client = net.get_ssh(node) cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state')) net.ssh_execute(client=client, command=cmd) client.close() if no_localhost: os.system('touch {}/remote'.format(cluster_path)) msg = message.get('complete_deploy').format(cluster_id=cluster_id) logger.info(msg) Cluster().use(cluster_id) msg = message.get('suggest_after_deploy') logger.info(msg)
def _deploy_zero_downtime(cluster_id): logger.debug("zero downtime update cluster {}".format(cluster_id)) center = Center() center.update_ip_port() m_hosts = center.master_host_list m_ports = center.master_port_list s_hosts = center.slave_host_list s_ports = center.slave_port_list path_of_fb = config.get_path_of_fb(cluster_id) cluster_path = path_of_fb['cluster_path'] # check master alive m_count = len(m_hosts) * len(m_ports) alive_m_count = center.get_alive_master_redis_count() if alive_m_count < m_count: logger.error(message.get('error_exist_disconnected_master')) return if not config.is_slave_enabled: logger.error(message.get('error_need_to_slave')) return # select installer installer_path = ask_util.installer() installer_name = os.path.basename(installer_path) # backup info current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime()) conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time) cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time) local_ip = config.get_local_ip() # backup conf center.conf_backup(local_ip, cluster_id, conf_backup_dir) # backup cluster for host in s_hosts: client = net.get_ssh(host) center.cluster_backup(host, cluster_id, cluster_backup_dir) client.close() # transfer & install logger.info(message.get('transfer_and_execute_installer')) for host in m_hosts: logger.info(' - {}'.format(host)) client = net.get_ssh(host) cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path) net.ssh_execute(client=client, command=cmd) client.close() DeployUtil().transfer_installer(host, cluster_id, installer_path) try: DeployUtil().install(host, cluster_id, installer_name) except SSHCommandError as ex: msg = message.get('error_execute_installer') msg = msg.format(installer=installer_path) logger.error(msg) logger.exception(ex) return # restore conf center.conf_restore(local_ip, cluster_id, conf_backup_dir) # set deploy state complete for node in m_hosts: path_of_fb = config.get_path_of_fb(cluster_id) cluster_path = path_of_fb['cluster_path'] client = net.get_ssh(node) cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state')) net.ssh_execute(client=client, command=cmd) client.close() # restart slave center.stop_current_nodes(master=False, slave=True) center.configure_redis() center.sync_conf() center.start_current_nodes(master=False, slave=True) center.wait_until_all_redis_process_up() slaves_for_failover = center.get_slave_nodes() key = 'cluster-node-timeout' origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0]) origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0]) logger.debug('config set: cluster-node-timeout 2000') RedisCliConfig().set(key, '2000', all=True) # cluster failover (with no option) logger.info(message.get('failover_on_deploy')) logger.debug(slaves_for_failover) try_count = 0 while try_count < 10: try_count += 1 success = True for slave_addr in slaves_for_failover: host, port = slave_addr.split(':') stdout = center.run_failover("{}:{}".format(host, port)) logger.debug("failover {}:{} {}".format(host, port, stdout)) if stdout != "ERR You should send CLUSTER FAILOVER to a slave": # In some cases, the cluster failover is not complete # even if stdout is OK # If redis changed to master completely, # return 'ERR You should send CLUSTER FAILOVER to a slave' success = False if success: break msg = message.get('retry').format(try_count=try_count) logger.info(msg) time.sleep(5) logger.debug('restore config: cluster-node-timeout') center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports) center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports) if not success: logger.error(message.get('error_redis_failover')) return # restart master (current slave) center.stop_current_nodes(master=False, slave=True) center.configure_redis(slave=False) center.sync_conf() center.start_current_nodes(master=False, slave=True) center.wait_until_all_redis_process_up()