def ping(host=None, port=None, all=False): """Send ping command :param all: If true, send command to all :param host: host info for redis :param port: port info for redis """ if not isinstance(all, bool): msg = message.get('error_option_type_not_boolean').format(option='all') logger.error(msg) return if (not host or not port) and not all: msg = message.get('use_host_port_or_option_all') logger.error(msg) return if all: meta = [] ret = RedisCliUtil.command_all_async('ping 2>&1') pong_cnt = 0 for m_s, host, port, result, _ in ret: addr = '{}:{}'.format(host, port) if result == 'OK': pong_cnt += 1 else: meta.append([m_s, addr, color.red('FAIL')]) if meta: utils.print_table([['TYPE', 'ADDR', 'RESULT']] + meta) msg = message.get('counting_alive_redis') msg = msg.format(alive=pong_cnt, total=len(ret)) logger.info(msg) return if host and port: _command('ping', False, host, port)
def __init__(self, exit_status, command, hostname, port): self.exit_status = exit_status self.command = command self.hostname = hostname self.port = port msg = [ 'CommandError:', 'exit_status={}'.format(exit_status), 'command={}'.format(command), 'host:port={}:{}'.format(hostname, port) ] logger.error('\n'.join(msg))
def base_directory(default='~/tsr2'): logger.debug('ask base directory') result = ask(message.get('ask_base_directory'), default=default) if not result.startswith(('~', '/')): logger.error(message.get('error_invalid_path').format(value=result)) return base_directory() logger.info('OK, {}'.format(result)) cli_config = config.get_cli_config() cli_config['base_directory'] = result config.save_cli_config(cli_config) return result
def reset_oom(all=False, host=None, port=0): """Send reset oom command :param all: If true, send command to all :param host: host info for redis :param port: port info for redis """ if not isinstance(all, bool): msg = message.get('error_option_type_not_boolean').format(option='all') logger.error(msg) return sub_cmd = 'resetOom' _command(sub_cmd, all, host, port)
def set(self, key, value, all=False, save=False, host=None, port=None): """Command: redis-cli config set :param key: target key :param value: value to set :param save: If true, save value to config file :param all: If true, send command to all redis :param host: host info for redis :param port: port info for redis """ if not isinstance(all, bool): msg = m.get('error_option_type_not_boolean') msg = msg.format(options='all') logger.error(msg) return if not isinstance(save, bool): msg = m.get('error_option_type_not_boolean') msg = msg.format(options='save') logger.error(msg) return if (not host or not port) and not all: msg = m.get('use_host_port_or_option_all') logger.error(msg) return sub_cmd = 'config set {key} {value} 2>&1'.format(key=key, value=value) if all: meta = [] ret = RedisCliUtil.command_all_async(sub_cmd) ok_cnt = 0 for m_s, host, port, result, message in ret: addr = '{}:{}'.format(host, port) if result == 'OK': if utils.to_str(message) == 'OK': ok_cnt += 1 else: meta.append([m_s, addr, color.red(message)]) else: meta.append([m_s, addr, color.red('FAIL')]) if meta: utils.print_table([['TYPE', 'ADDR', 'RESULT']] + meta) logger.info('success {}/{}'.format(ok_cnt, len(ret))) else: output = RedisCliUtil.command(sub_cmd=sub_cmd, host=host, port=port, formatter=self.no_print) output = output.strip() if output == "OK": logger.info(output) else: logger.error(output) if save: RedisCliUtil.save_redis_template_config(key, value) center = Center() center.update_ip_port() success = center.check_hosts_connection() if not success: return center.configure_redis() center.sync_conf()
def metakeys(key, all=False, host=None, port=0): """Get metakeys :param key: target key :param all: If true, send command to all :param host: host info for redis :param port: port info for redis """ if not isinstance(all, bool): msg = message.get('error_option_type_not_boolean').format(option='all') logger.error(msg) return sub_cmd = 'metakeys "%s"' % key _command(sub_cmd, all, host, port)
def reset_info(key, all=False, host=None, port=0): """Send reset info :param key: resetting target key string :param all: If true, send command to all :param host: host info for redis :param port: port info for redis """ if not isinstance(all, bool): msg = message.get('error_option_type_not_boolean').format(option='all') logger.error(msg) return sub_cmd = 'resetInfo %s' % key _command(sub_cmd, all, host, port)
def ssd_count(save, default=None): logger.debug('ask ssd count') deploy_history = config.get_deploy_history() if not default: default = deploy_history['ssd_count'] result = int(askInt(message.get('ask_ssd_count'), default=str(default))) if result <= 0: logger.error(message.get('error_ssd_count_less_than_1')) return ssd_count(save=save, default=default) if save: deploy_history['ssd_count'] = result config.save_deploy_history(deploy_history) logger.info('OK, {}'.format(result)) return result
def replicas(save, default=None): logger.debug('ask replicas') deploy_history = config.get_deploy_history() if not default: default = deploy_history['replicas'] result = askInt(message.get('ask_replicas'), default=str(default)) result = int(result) if result < 0: logger.error(message.get('error_replicas_less_than_0')) return replicas(save, default=default) if save: deploy_history['replicas'] = result config.save_deploy_history(deploy_history) logger.info('OK, {}'.format(result)) return result
def host_for_monitor(host_list): formatted = [] for i, v in enumerate(host_list): formatted.append(' ({}) {}'.format(i + 1, v)) stringfied_list = '\n'.join(formatted) msg = '\n'.join(message.get('ask_host_for_monitor')) msg = msg.format(list=stringfied_list) target_num = int(askInt(msg, default='1')) while True: if target_num > 0 and target_num <= len(host_list): break msg = message.get('error_select_number').format( max_number=len(host_list)) logger.error(msg) target_num = int(askInt('')) return host_list[target_num - 1]
def clean(self, logs=False): """Clean cluster Delete redis config, data, node configuration. :param log: Delete log of redis """ if not isinstance(logs, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='logs') logger.error(msg) return center = Center() center.update_ip_port() if logs: center.remove_all_of_redis_log_force() return center.cluster_clean() msg = message.get('apply_after_restart') logger.info(msg)
def run_sync(host=None): """Import clusters from the host """ if host is None: logger.error('host information is not available') return None cluster_base = config.get_base_directory() if not os.path.exists(cluster_base): logger.error('cluster does not exist on the localhost.') os.mkdir(cluster_base) cluster_set = set( filter(lambda x: re.match(r'cluster_[\d]+', x), os.listdir(cluster_base))) client = net.get_ssh(host) if not net.is_dir(client, cluster_base): logger.error('cluster does not exist on the host({}).'.format(host)) return None target_cluster_set = set( filter( lambda x: re.match(r'cluster_[\d]+', x), net.ssh_execute(client, 'ls {}'.format(cluster_base))[1].split())) conflict_cluster = cluster_set & target_cluster_set import_target = (cluster_set ^ target_cluster_set) & target_cluster_set for cluster in conflict_cluster: msg = message.get('ask_cluster_overwrite').format( cluster=" ".join(cluster.split('_'))) overwrite = ask_util.askBool(msg, default='n') if overwrite: import_target.add(cluster) os.system("rm -rf {}".format(cluster_base + "/" + cluster)) for target in import_target: os.system("rsync -a {} {}".format( host + ":" + cluster_base + "/" + target, cluster_base)) logger.info("Importing cluster complete...")
def stop(self, force=False, master=True, slave=True): """Stop cluster :param force: Force the cluster to shut down :param master: If exclude master cluster, set False :param slave: If exclude slave cluster, set False """ if not isinstance(force, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='force') logger.error(msg) return if not isinstance(master, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='master') logger.error(msg) return if not isinstance(slave, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='slave') logger.error(msg) return center = Center() center.update_ip_port() success = center.check_hosts_connection() if not success: return center.stop_redis(force, master=master, slave=slave)
def _handle(text): if text == '': return if text == 'clear': utils.clear_screen() return text = text.replace('-- --help', '?') text = text.replace('--help', '?') text = text.replace('?', '-- --help') err_flg = True try: fire.Fire(component=Command, command=text) err_flg = False except KeyboardInterrupt: msg = message.get('cancel_command_input') logger.warning('\b\b' + msg) except KeyError as ex: logger.warn('[%s] command fail' % text) logger.exception(ex) except TypeError as ex: logger.exception(ex) except IOError as ex: if ex.errno == 2: msg = message.get('error_file_not_exist').format(file=ex.filename) logger.error(msg) else: logger.exception(ex) except EOFError: msg = message.get('cancel_command_input') logger.warning('\b\b' + msg) except utils.CommandError as ex: logger.exception(ex) except FireError: pass except FireExit: pass except ( HostNameError, HostConnectionError, SSHConnectionError, FileNotExistError, YamlSyntaxError, PropsSyntaxError, PropsKeyError, PropsError, SSHCommandError, ClusterRedisError, ClusterNotExistError, ClusterIdError, EnvError, ) as ex: logger.error('{}: {}'.format(ex.class_name(), str(ex))) except LightningDBError as ex: logger.error('[ErrorCode {}] {}'.format(ex.error_code, str(ex))) except BaseException as ex: logger.exception(ex) finally: return err_flg
def _initial_check(): try: # Simple check to see if ssh access to localhost is possible net.get_ssh('localhost') except paramiko.ssh_exception.SSHException: msg = message.get('error_ssh_connection').format(host='localhost') logger.error(msg) exit(1) cli_config = config.get_cli_config() try: base_directory = cli_config['base_directory'] except KeyError: pass except TypeError: root_of_cli_config = config.get_root_of_cli_config() conf_path = os.path.join(root_of_cli_config, 'config') os.system('rm {}'.format(conf_path)) base_directory = None if not base_directory or not base_directory.startswith(('~', '/')): base_directory = ask_util.base_directory() base_directory = os.path.expanduser(base_directory) if not os.path.isdir(base_directory): os.system('mkdir -p {}'.format(base_directory))
def start(self, profile=False, master=True, slave=True): """Start cluster :param master: If exclude master cluster, set False :param slave: If exclude slave cluster, set False """ logger.debug("command 'cluster start'") if not isinstance(profile, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='profile') logger.error(msg) return if not isinstance(master, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='master') logger.error(msg) return if not isinstance(slave, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='slave') logger.error(msg) return center = Center() center.update_ip_port() success = center.check_hosts_connection() if not success: return center.ensure_cluster_exist() if master: master_alive_count = center.get_alive_master_redis_count() master_alive_count_mine = center.get_alive_master_redis_count( check_owner=True ) not_mine_count = master_alive_count - master_alive_count_mine if not_mine_count > 0: msg = message.get('error_cluster_start_master_collision') msg = '\n'.join(msg).format(count=not_mine_count) raise LightningDBError(11, msg) if slave: slave_alive_count = center.get_alive_slave_redis_count() slave_alive_count_mine = center.get_alive_slave_redis_count( check_owner=True ) not_mine_count = slave_alive_count - slave_alive_count_mine if not_mine_count > 0: msg = message.get('error_cluster_start_slave_collision') msg = '\n'.join(msg).format(count=not_mine_count) raise LightningDBError(12, msg) center.backup_server_logs(master=master, slave=slave) center.create_redis_data_directory() # equal to cluster.configure() center.configure_redis() center.sync_conf(show_result=True) center.start_redis_process(profile, master=master, slave=slave) center.wait_until_all_redis_process_up(master=master, slave=slave)
def run_deploy(cluster_id=None, history_save=True, clean=False, strategy="none"): """Install LightningDB package. :param cluster_id: cluster id :param history_save: save input history and use as default :param clean: delete redis log, node configuration :param strategy: none(default): normal deploy, zero-downtime: re-deploy without stop """ # validate cluster id if cluster_id is None: cluster_id = config.get_cur_cluster_id(allow_empty_id=True) if cluster_id < 0: msg = message.get('error_invalid_cluster_on_deploy') logger.error(msg) return if not cluster_util.validate_id(cluster_id): raise ClusterIdError(cluster_id) # validate option if not isinstance(history_save, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='history-save') logger.error(msg) return logger.debug("option '--history-save': {}".format(history_save)) if not isinstance(clean, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='clean') logger.error(msg) return logger.debug("option '--clean': {}".format(clean)) strategy_list = ["none", "zero-downtime"] if strategy not in strategy_list: msg = message.get('error_deploy_strategy').format(value=strategy, list=strategy_list) logger.error(msg) return if strategy == "zero-downtime": run_cluster_use(cluster_id) _deploy_zero_downtime(cluster_id) return _deploy(cluster_id, history_save, clean)
def get(self, key, all=False, host=None, port=None): """Command: redis-cli config get :param key: redis config keyword :param all: If true, send command to all redis :param host: host info for redis :param port: port info for redis """ if not isinstance(all, bool): msg = m.get('error_option_type_not_boolean') msg = msg.format(options='all') logger.error(msg) return if (not host or not port) and not all: msg = m.get('use_host_port_or_option_all') logger.error(msg) return sub_cmd = 'config get "{key}" 2>&1'.format(key=key) if all: meta = [] ret = RedisCliUtil.command_all_async(sub_cmd) for m_s, host, port, result, message in ret: addr = '{}:{}'.format(host, port) if result == 'OK': if message: _, value = message.split('\n') meta.append([m_s, addr, value]) else: meta.append([m_s, addr, color.red('Invalid Key')]) else: meta.append([m_s, addr, color.red(result)]) utils.print_table([['TYPE', 'ADDR', 'RESULT']] + meta) else: output = RedisCliUtil.command(sub_cmd=sub_cmd, host=host, port=port, formatter=self.no_print) output = output.strip() if output: key, value = output.split('\n') logger.info(value) else: msg = m.get('error_invalid_key').format(key=key) logger.error(msg)
def run_monitor(n=10, t=2): """Monitoring logs of redis. :param n: number of lines to print log :param t: renewal cycle(sec) """ if not isinstance(n, int): msg = message.get('error_option_type_not_number').format(option='n') logger.error(msg) return if not isinstance(t, int) and not isinstance(t, float): msg = message.get('error_option_type_not_float').format(option='t') logger.error(msg) return try: sp.check_output('which tail', shell=True) except Exception: msg = message.get('error_not_found_command_tail') logger.error(msg) return cluster_id = config.get_cur_cluster_id() path_of_fb = config.get_path_of_fb(cluster_id) sr2_redis_log = path_of_fb['sr2_redis_log'] log_files = '{}/servers*'.format(sr2_redis_log) host_list = config.get_master_host_list() target_host = ask_util.host_for_monitor(host_list) try: sp.check_output('which watch', shell=True) command = "ssh -t {} watch -n {} 'tail -n {} {}'".format( target_host, t, n, log_files) sp.call(command, shell=True) except Exception: msg = message.get('error_not_found_command_watch') logger.warning(msg) logger.info(message.get('message_for_exit')) command = "tail -F -s {} {}".format(t, log_files) client = net.get_ssh(target_host) net.ssh_execute_async(client, command)
def master_ports(save, cluster_id, default_count=None): logger.debug('ask master ports') deploy_history = config.get_deploy_history() if not default_count: default_count = deploy_history['master_count'] msg = message.get('ask_master_count') m_count = int(askInt(msg, default=str(default_count))) if m_count <= 0: logger.error(message.get('error_master_count_less_than_1')) return master_ports(cluster_id, default_count) logger.info('OK, {}'.format(m_count)) if save: deploy_history['master_count'] = m_count config.save_deploy_history(deploy_history) start_m_ports = START_PORT + cluster_id * MASTER_OFFSET end_m_ports = start_m_ports + m_count - 1 if start_m_ports == end_m_ports: default_m_ports = str(start_m_ports) else: default_m_ports = '{}-{}'.format(start_m_ports, end_m_ports) while True: result = ask(message.get('ask_ports'), default=default_m_ports) result = list(map(lambda x: x.strip(), result.split(','))) valid = True m_ports = set() pattern = re.compile('[0-9]+-[0-9]+') for item in result: # range number matched = pattern.match(item) if matched: s, e = map(int, item.split('-')) if s > e: msg = message.get('error_invalid_range').format(value=item) logger.error(msg) valid = False break m_ports.update(range(s, e + 1)) continue # single number elif utils.is_number(item): m_ports.add(int(item)) continue else: msg = message.get('error_invalid_input').format(value=item) logger.error(msg) valid = False break if not valid: continue out_of_range = [] for port in m_ports: if not port_range_safe(port): out_of_range.append(port) if out_of_range: msg = message.get('error_port_range').format( minimum=PORT_MININUM, maximum=PORT_MAXIMUM, value=out_of_range, ) logger.error(msg) continue if valid and len(m_ports) != m_count: msg = message.get('error_port_count_different').format( advance=m_count, current=len(m_ports), ) logger.error(msg) continue if valid: break m_ports = sorted(list(m_ports)) logger.info('OK, {}'.format(result)) return m_ports
def restart( self, force_stop=False, reset=False, cluster=False, profile=False, yes=False, ): """Restart cluster :param force_stop: Force the cluster to shuto down :param reset: Delete redis config, data, node configuration :param cluster: Create cluster after cluster start :param yes: Skip confirm information when cluster create """ if not isinstance(force_stop, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='force-stop') logger.error(msg) return if not isinstance(reset, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='reset') logger.error(msg) return if not reset and cluster: msg = message.get('error_option_use_with') msg = msg.format(option='cluster', with_option='reset') logger.error(msg) return if not isinstance(cluster, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='cluster') logger.error(msg) return if not cluster and yes: msg = message.get('error_option_use_with') msg = msg.format(option='yes', with_option='cluster') msg = "option '--yes' can used only with option '--cluster'" logger.error(msg) return if not isinstance(yes, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='yes') logger.error(msg) return center = Center() center.update_ip_port() success = center.check_hosts_connection() if not success: return center.stop_redis(force=force_stop) if reset: self.clean() self.start(profile=profile) if cluster: self.create(yes=yes)
def add_slave(self, yes=False): """Add slave of cluster Add slaves to cluster that configured master only. :param yes: Skip confirm information """ logger.debug('add_slave') if not isinstance(yes, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='yes') logger.error(msg) return center = Center() center.update_ip_port() # check s_hosts = center.slave_host_list s_ports = center.slave_port_list if not s_hosts: msg = message.get('error_slave_host_empty') raise ClusterRedisError(msg) if not s_ports: msg = message.get('error_slave_port_empty') raise ClusterRedisError(msg) success = center.check_hosts_connection(hosts=s_hosts) if not success: return center.ensure_cluster_exist() slave_alive_count = center.get_alive_slave_redis_count() slave_alive_count_mine = center.get_alive_slave_redis_count( check_owner=True ) not_mine_count = slave_alive_count - slave_alive_count_mine if not_mine_count > 0: msg = message.get('error_cluster_start_slave_collision') msg = '\n'.join(msg).format(count=not_mine_count) raise LightningDBError(12, msg) # confirm info result = center.confirm_node_port_info(skip=yes) if not result: msg = message.get('cancel') logger.warning(msg) return # clean center.cluster_clean(master=False) # backup logs center.backup_server_logs(master=False) center.create_redis_data_directory(master=False) # configure center.configure_redis(master=False) center.sync_conf() # start center.start_redis_process(master=False) center.wait_until_all_redis_process_up() # change redis config temporarily key = 'cluster-node-timeout' origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0]) if not origin_s_value: msg = "RedisConfigKeyError: '{}'".format(key) logger.warning(msg) if origin_s_value: # cli config set cluster-node-timeout 2000 logger.debug('set cluster node time out 2000 for create') center.cli_config_set_all(key, '2000', s_hosts, s_ports) # create center.replicate() if origin_s_value: # cli config restore cluster-node-timeout logger.debug('restore cluster node time out') center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
def restore(self, cluster_id, tag=None): """Restore cluster :param cluster_id: target cluster id :param tag: Tag of backup, if omitted, restore the most recent backup file """ logger.debug('cluster restore: cluster_id={}, tag={}'.format( cluster_id, tag )) if not cluster_util.validate_id(cluster_id): raise ClusterIdError(cluster_id) # find restore folder with tag (local) path_of_fb = config.get_path_of_fb(cluster_id) cluster_backup_path = path_of_fb['cluster_backup_path'] if tag is None: backup_list = os.listdir(cluster_backup_path) pattern = 'cluster_{}_bak_'.format(cluster_id) filtered = filter(lambda x: x.startswith(pattern), backup_list) sorted_list = sorted(list(filtered)) if not sorted_list: msg = message.get('error_not_found_any_backup') logger.error('BackupNotExistError: ' + msg) return tag = sorted_list[-1] logger.debug("tag option is empty, auto select: {}".format(tag)) cluster_restore_dir = tag backup_path = os.path.join(cluster_backup_path, cluster_restore_dir) if not os.path.isdir(backup_path): msg = message.get('error_not_found_backup').format(tag=tag) logger.error('BackupNotExistError: ' + msg) return # get hosts from cluster props props_path = os.path.join( backup_path, 'tsr2-assembly-1.0.0-SNAPSHOT', 'conf', 'redis.properties' ) hosts = config.get_props(props_path, 'sr2_redis_master_hosts', []) # check status of hosts success = Center().check_hosts_connection(hosts, True) if not success: msg = message.get('error_exist_unavailable_host') logger.error(msg) return logger.debug('Connection of all hosts ok.') success = Center().check_include_localhost(hosts) if not success: msg = message.get('error_not_include_localhost') logger.error(msg) return # check all host tag folder: OK / NOT FOUND msg = message.get('check_backup_info') logger.info(msg) buf = [] for host in hosts: client = net.get_ssh(host) if not net.is_dir(client, backup_path): logger.debug('cannot find backup dir: {}-{}'.format( host, cluster_restore_dir )) buf.append([host, color.red('NOT FOUND')]) client.close() if buf: utils.print_table([['HOST', 'RESULT'] + buf]) return logger.info('OK') # backup cluster new_tag = time.strftime("%Y%m%d%H%M%S", time.gmtime()) cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, new_tag) for host in hosts: Center().cluster_backup(host, cluster_id, cluster_backup_dir) # restore cluster command = "cp -r {} {}/cluster_{}".format( backup_path, path_of_fb['base_directory'], cluster_id ) for host in hosts: msg = message.get('restore_cluster') msg = msg.format(tag=cluster_backup_dir, host=host) logger.info(msg) client = net.get_ssh(host) net.ssh_execute(client, command) client.close() logger.info("OK")
def _deploy(cluster_id, history_save, clean): deploy_state = DeployUtil().get_state(cluster_id) if deploy_state == DEPLOYED: msg = message.get('ask_deploy_again') msg = msg.format(cluster_id=cluster_id) msg = color.yellow(msg) yes = ask_util.askBool(msg, default='n') if not yes: logger.info(message.get('cancel')) return restore_yes = None no_localhost = False current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime()) cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time) conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time) tmp_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, 'tmp') meta = [['NAME', 'VALUE']] path_of_fb = config.get_path_of_fb(cluster_id) conf_path = path_of_fb['conf_path'] props_path = path_of_fb['redis_properties'] cluster_path = path_of_fb['cluster_path'] path_of_cli = config.get_path_of_cli(cluster_id) conf_backup_path = path_of_cli['conf_backup_path'] tmp_backup_path = os.path.join(conf_backup_path, tmp_backup_dir) local_ip = config.get_local_ip() # ask installer installer_path = ask_util.installer() installer_name = os.path.basename(installer_path) meta.append(['installer', installer_name]) # ask restore conf if deploy_state == DEPLOYED: restore_yes = ask_util.askBool(message.get('ask_restore_conf')) meta.append(['restore', restore_yes]) # input props hosts = [] if deploy_state == DEPLOYED: if restore_yes: meta += DeployUtil().get_meta_from_props(props_path) hosts = config.get_props(props_path, 'sr2_redis_master_hosts') else: if not os.path.isdir(conf_backup_path): os.mkdir(conf_backup_path) if os.path.exists(tmp_backup_path): msg = message.get('ask_load_history_of_previous_modification') yes = ask_util.askBool(msg) if not yes: shutil.rmtree(tmp_backup_path) if not os.path.exists(tmp_backup_path): os.mkdir(tmp_backup_path) shutil.copy(os.path.join(conf_path, 'redis.properties'), os.path.join(tmp_backup_path, 'redis.properties')) tmp_props_path = os.path.join(tmp_backup_path, 'redis.properties') editor.edit(tmp_props_path, syntax='sh') meta += DeployUtil().get_meta_from_props(tmp_props_path) hosts = config.get_props(tmp_props_path, 'sr2_redis_master_hosts') else: # new deploy props_dict = ask_util.props(cluster_id, save=history_save) hosts = props_dict['hosts'] meta += DeployUtil().get_meta_from_dict(props_dict) utils.print_table(meta) msg = message.get('confirm_deploy_information') yes = ask_util.askBool(msg) if not yes: logger.info(message.get('cancel')) return # check node status success = Center().check_hosts_connection(hosts, True) if not success: msg = message.get('error_exist_unavailable_host') logger.error(msg) return logger.debug('Connection of all hosts ok.') success = Center().check_include_localhost(hosts) if not success: no_localhost = True # get port info if deploy_state == DEPLOYED: if restore_yes: key = 'sr2_redis_master_ports' m_ports = config.get_props(props_path, key, []) key = 'sr2_redis_slave_ports' s_ports = config.get_props(props_path, key, []) replicas = len(s_ports) // len(m_ports) else: key = 'sr2_redis_master_ports' m_ports = config.get_props(tmp_props_path, key, []) key = 'sr2_redis_slave_ports' s_ports = config.get_props(tmp_props_path, key, []) replicas = len(s_ports) // len(m_ports) else: m_ports = props_dict['master_ports'] s_ports = props_dict['slave_ports'] replicas = props_dict['replicas'] while True: msg = message.get('check_port') logger.info(msg) host_ports_list = [] for host in hosts: host_ports_list.append((host, m_ports + s_ports)) conflict = Center().check_port_is_enable(host_ports_list) if not conflict: logger.info("OK") break utils.print_table([["HOST", "PORT"]] + conflict) msg = message.get('ask_port_collision') msg = color.yellow(msg) yes = ask_util.askBool(msg) if yes: logger.info("OK") break m_ports = ask_util.master_ports(False, cluster_id) replicas = ask_util.replicas(False) s_ports = ask_util.slave_ports(cluster_id, len(m_ports), replicas) if deploy_state == DEPLOYED: if restore_yes: key = 'sr2_redis_master_ports' value = cluster_util.convert_list_2_seq(m_ports) config.set_props(props_path, key, value) key = 'sr2_redis_slave_ports' value = cluster_util.convert_list_2_seq(s_ports) config.set_props(props_path, key, value) else: key = 'sr2_redis_master_ports' value = cluster_util.convert_list_2_seq(m_ports) config.set_props(tmp_props_path, key, value) key = 'sr2_redis_slave_ports' value = cluster_util.convert_list_2_seq(s_ports) config.set_props(tmp_props_path, key, value) else: props_dict['master_ports'] = m_ports props_dict['slave_ports'] = s_ports props_dict['replicas'] = replicas # if pending, delete legacy on each hosts if no_localhost: if DeployUtil().get_state(cluster_id, local_ip) == PENDING: client = net.get_ssh(local_ip) command = 'rm -rf {}'.format(cluster_path) net.ssh_execute(client=client, command=command) client.close() for host in hosts: if DeployUtil().get_state(cluster_id, host) == PENDING: client = net.get_ssh(host) command = 'rm -rf {}'.format(cluster_path) net.ssh_execute(client=client, command=command) client.close() # added_hosts = post_hosts - pre_hosts msg = message.get('check_cluster_exist') logger.info(msg) added_hosts = set(hosts) meta = [] if deploy_state == DEPLOYED: pre_hosts = config.get_props(props_path, 'sr2_redis_master_hosts') added_hosts -= set(pre_hosts) can_deploy = True if no_localhost: added_hosts |= set([local_ip]) for host in added_hosts: client = net.get_ssh(host) is_localhost = Center().is_localhost(host) if is_localhost: if no_localhost: continue if os.path.exists(cluster_path + '/remote'): meta.append([host, color.green('CLEAN')]) continue if net.is_exist(client, cluster_path): meta.append([host, color.red('CLUSTER EXIST')]) can_deploy = False continue meta.append([host, color.green('CLEAN')]) if meta: utils.print_table([['HOST', 'STATUS']] + meta) if not can_deploy: msg = message.get('error_cluster_collision') logger.error(msg) return # if not force: # logger.error("If you trying to force, use option '--force'") # return logger.info('OK') # cluster stop and clean if deploy_state == DEPLOYED and clean: center = Center() cur_cluster_id = config.get_cur_cluster_id(allow_empty_id=True) run_cluster_use(cluster_id) center.update_ip_port() center.stop_redis() center.remove_all_of_redis_log_force() center.cluster_clean() run_cluster_use(cur_cluster_id) # backup conf if deploy_state == DEPLOYED: Center().conf_backup(local_ip, cluster_id, conf_backup_dir) # backup cluster backup_hosts = [] if deploy_state == DEPLOYED: backup_hosts += set(pre_hosts) # if force: # backup_hosts += added_hosts for host in backup_hosts: cluster_path = path_of_fb['cluster_path'] client = net.get_ssh(host) Center().cluster_backup(host, cluster_id, cluster_backup_dir) client.close() # transfer & install msg = message.get('transfer_and_execute_installer') logger.info(msg) target_hosts = hosts + [local_ip] if no_localhost else hosts for host in target_hosts: if not (no_localhost and Center().is_localhost(host)): logger.info(' - {}'.format(host)) client = net.get_ssh(host) cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path) net.ssh_execute(client=client, command=cmd) client.close() DeployUtil().transfer_installer(host, cluster_id, installer_path) try: DeployUtil().install(host, cluster_id, installer_name) except SSHCommandError as ex: msg = message.get('error_execute_installer') msg = msg.format(installer=installer_path) logger.error(msg) logger.exception(ex) return # setup props if deploy_state == DEPLOYED: if restore_yes: tag = conf_backup_dir else: tag = tmp_backup_dir Center().conf_restore(local_ip, cluster_id, tag) else: key = 'sr2_redis_master_hosts' config.make_key_enable(props_path, key) config.set_props(props_path, key, props_dict['hosts']) key = 'sr2_redis_master_ports' config.make_key_enable(props_path, key) value = cluster_util.convert_list_2_seq(props_dict['master_ports']) config.set_props(props_path, key, value) key = 'sr2_redis_slave_hosts' config.make_key_enable(props_path, key) config.set_props(props_path, key, props_dict['hosts']) config.make_key_disable(props_path, key) if props_dict['replicas'] > 0: key = 'sr2_redis_slave_hosts' config.make_key_enable(props_path, key) key = 'sr2_redis_slave_ports' config.make_key_enable(props_path, key) value = cluster_util.convert_list_2_seq(props_dict['slave_ports']) config.set_props(props_path, key, value) key = 'ssd_count' config.make_key_enable(props_path, key) config.set_props(props_path, key, props_dict['ssd_count']) key = 'sr2_redis_data' config.make_key_enable(props_path, key, v1_flg=True) config.make_key_enable(props_path, key, v1_flg=True) config.make_key_disable(props_path, key) config.set_props(props_path, key, props_dict['prefix_of_db_path']) key = 'sr2_redis_db_path' config.make_key_enable(props_path, key, v1_flg=True) config.make_key_enable(props_path, key, v1_flg=True) config.make_key_disable(props_path, key) config.set_props(props_path, key, props_dict['prefix_of_db_path']) key = 'sr2_flash_db_path' config.make_key_enable(props_path, key, v1_flg=True) config.make_key_enable(props_path, key, v1_flg=True) config.make_key_disable(props_path, key) config.set_props(props_path, key, props_dict['prefix_of_db_path']) # synk props msg = message.get('sync_conf') logger.info(msg) for node in hosts: if socket.gethostbyname(node) in config.get_local_ip_list(): continue client = net.get_ssh(node) if not client: msg = message.get('error_ssh_connection').format(host=node) logger.error(msg) return net.copy_dir_to_remote(client, conf_path, conf_path) client.close() # set deploy state complete if os.path.exists(tmp_backup_path): shutil.rmtree(tmp_backup_path) for node in target_hosts: path_of_fb = config.get_path_of_fb(cluster_id) cluster_path = path_of_fb['cluster_path'] client = net.get_ssh(node) cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state')) net.ssh_execute(client=client, command=cmd) client.close() if no_localhost: os.system('touch {}/remote'.format(cluster_path)) msg = message.get('complete_deploy').format(cluster_id=cluster_id) logger.info(msg) Cluster().use(cluster_id) msg = message.get('suggest_after_deploy') logger.info(msg)
def _deploy_zero_downtime(cluster_id): logger.debug("zero downtime update cluster {}".format(cluster_id)) center = Center() center.update_ip_port() m_hosts = center.master_host_list m_ports = center.master_port_list s_hosts = center.slave_host_list s_ports = center.slave_port_list path_of_fb = config.get_path_of_fb(cluster_id) cluster_path = path_of_fb['cluster_path'] # check master alive m_count = len(m_hosts) * len(m_ports) alive_m_count = center.get_alive_master_redis_count() if alive_m_count < m_count: logger.error(message.get('error_exist_disconnected_master')) return if not config.is_slave_enabled: logger.error(message.get('error_need_to_slave')) return # select installer installer_path = ask_util.installer() installer_name = os.path.basename(installer_path) # backup info current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime()) conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time) cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time) local_ip = config.get_local_ip() # backup conf center.conf_backup(local_ip, cluster_id, conf_backup_dir) # backup cluster for host in s_hosts: client = net.get_ssh(host) center.cluster_backup(host, cluster_id, cluster_backup_dir) client.close() # transfer & install logger.info(message.get('transfer_and_execute_installer')) for host in m_hosts: logger.info(' - {}'.format(host)) client = net.get_ssh(host) cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path) net.ssh_execute(client=client, command=cmd) client.close() DeployUtil().transfer_installer(host, cluster_id, installer_path) try: DeployUtil().install(host, cluster_id, installer_name) except SSHCommandError as ex: msg = message.get('error_execute_installer') msg = msg.format(installer=installer_path) logger.error(msg) logger.exception(ex) return # restore conf center.conf_restore(local_ip, cluster_id, conf_backup_dir) # set deploy state complete for node in m_hosts: path_of_fb = config.get_path_of_fb(cluster_id) cluster_path = path_of_fb['cluster_path'] client = net.get_ssh(node) cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state')) net.ssh_execute(client=client, command=cmd) client.close() # restart slave center.stop_current_nodes(master=False, slave=True) center.configure_redis() center.sync_conf() center.start_current_nodes(master=False, slave=True) center.wait_until_all_redis_process_up() slaves_for_failover = center.get_slave_nodes() key = 'cluster-node-timeout' origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0]) origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0]) logger.debug('config set: cluster-node-timeout 2000') RedisCliConfig().set(key, '2000', all=True) # cluster failover (with no option) logger.info(message.get('failover_on_deploy')) logger.debug(slaves_for_failover) try_count = 0 while try_count < 10: try_count += 1 success = True for slave_addr in slaves_for_failover: host, port = slave_addr.split(':') stdout = center.run_failover("{}:{}".format(host, port)) logger.debug("failover {}:{} {}".format(host, port, stdout)) if stdout != "ERR You should send CLUSTER FAILOVER to a slave": # In some cases, the cluster failover is not complete # even if stdout is OK # If redis changed to master completely, # return 'ERR You should send CLUSTER FAILOVER to a slave' success = False if success: break msg = message.get('retry').format(try_count=try_count) logger.info(msg) time.sleep(5) logger.debug('restore config: cluster-node-timeout') center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports) center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports) if not success: logger.error(message.get('error_redis_failover')) return # restart master (current slave) center.stop_current_nodes(master=False, slave=True) center.configure_redis(slave=False) center.sync_conf() center.start_current_nodes(master=False, slave=True) center.wait_until_all_redis_process_up()
def installer(): ''' Select installer from list of '$FBPATH/releases' or input absolute path of file directly return installer path ''' logger.debug('ask installer') path_of_cli = config.get_path_of_cli(None) release_path = path_of_cli['release_path'] if not os.path.exists(release_path): os.mkdir(release_path) installer_list = net.get_installers_from_fb_s3() buf = os.listdir(release_path) buf = list(filter(lambda x: x != '.gitignore', buf)) pattern = '.download' buf = list(filter(lambda x: pattern not in x, buf)) for file_name in buf: installer_list.append({ 'name': file_name, 'url': os.path.join(release_path, file_name), 'type': 'local', }) # formatting msg formatted = [] for i, obj in enumerate(installer_list): formatted.append(' ({index}) [{type}] {name}'.format( index=i + 1, name=obj['name'], type=obj['type'].upper(), )) if not formatted: formatted.append(' (empty)') stringfied_list = '\n'.join(formatted) msg = '\n'.join(message.get('ask_installer')).format(list=stringfied_list) result = ask(msg) while True: result = result.strip() if installer_list and utils.is_number(result): # case: select in list result = int(result) - 1 if result in range(0, len(installer_list)): selected = installer_list[result] if selected['type'] == 'download': url = selected['url'] file_name = selected['name'] installer_path = os.path.join(release_path, file_name) success = net.download_file(url, installer_path) if success: logger.info('OK, {}'.format(file_name)) return installer_path msg = message.get('error_download_installer') msg = msg.format(url=url) logger.error(msg) if selected['type'] == 'local': ret = selected['url'] logger.debug('Select insaller in list: {}'.format(ret)) logger.info('OK, {}'.format(selected['name'])) return os.path.expanduser(ret) msg = message.get('error_select_number') msg = msg.format(max_number=len(installer_list)) logger.error(msg) elif result.startswith(('~', '/')): # case: type path if os.path.isfile(os.path.expanduser(result)): logger.debug('Select insaller by path: {}'.format(result)) logger.info('OK, {}'.format(os.path.basename(result))) return os.path.expanduser(result) msg = message.get('error_type_installer_path') msg = msg.format(file_path=result) logger.error(msg) elif result.startswith(('http://', 'https://')): # case: type url url = result file_name = url.split('?')[0].split('/')[-1] installer_path = os.path.join(release_path, file_name) success = net.download_file(url, installer_path) if success: logger.info('OK, {}'.format(file_name)) return installer_path msg = message.get('error_download_installer') msg = msg.format(url=url) logger.error(msg) else: msg = message.get('error_invalid_input') msg = msg.format(value=result) logger.error(msg) result = ask('')
def __init__(self, target): self.target = target logger.error('DuplicatedError: %s' % target)
def slave_ports(cluster_id, m_count, replicas_count): logger.debug('ask slave ports') if replicas_count <= 0: logger.debug('return empty list') return [] s_count = m_count * replicas_count start_s_ports = START_PORT + (cluster_id * MASTER_OFFSET) + SLAVE_OFFSET end_s_ports = start_s_ports + s_count - 1 if start_s_ports == end_s_ports: default_s_ports = str(start_s_ports) else: default_s_ports = '{}-{}'.format(start_s_ports, end_s_ports) while True: result = ask(message.get('ask_ports'), default=default_s_ports) result = list(map(lambda x: x.strip(), result.split(','))) valid = True s_ports = set() p = re.compile('[0-9]+-[0-9]+') for item in result: # range number m = p.match(item) if m: s, e = map(int, item.split('-')) if s > e: msg = message.get('error_invalid_range').format(value=item) logger.error(msg) valid = False break s_ports.update(range(s, e + 1)) continue # single number elif utils.is_number(item): s_ports.add(int(item)) continue else: msg = message.get('error_invalid_input').format(value=item) logger.error(msg) valid = False break out_of_range = [] for port in s_ports: if not port_range_safe(port): out_of_range.append(port) if out_of_range: msg = message.get('error_port_range').format( minimum=PORT_MININUM, maximum=PORT_MAXIMUM, value=out_of_range, ) logger.error(msg) continue if valid and len(s_ports) != s_count: real_replicas_count = len(s_ports) / float(m_count) msg = message.get('error_port_count_different').format( advance=replicas_count, current=int(real_replicas_count), ) logger.error(msg) continue if valid: break s_ports = sorted(list(s_ports)) logger.info('OK, {}'.format(result)) return s_ports