def masters_with_dir(self, server, dir): """Find masters that use the specified directory path :param server: IP or hostname :param dir: directory path """ center = Center() center.update_ip_port() logger.debug('masters_with_dir') master_nodes = center.get_master_obj_list() ret = RedisCliUtil.command_all_async('config get dir', slave=True) outs = '' meta = [] m_endpoint = [] for node in master_nodes: m_endpoint.append(node['addr']) for _, host, port, res, stdout in ret: if res == 'OK': flat_stdout = '\n'.join([outs, stdout]) line = flat_stdout.splitlines() if self.compare_ip(host, server) and dir in line[2]: endpoint = '{}:{}'.format(socket.gethostbyname(host), port) if endpoint in m_endpoint: meta.append([host, port, line[2]]) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) utils.print_table([['HOST', 'PORT', 'PATH']] + meta)
def find_noaddr(self): """Find noaddr nodes that is not used anymore in cluster """ center = Center() center.update_ip_port() logger.debug('find_noaddr') ret = RedisCliUtil.command_all_async('cluster nodes', slave=True) outs = '' meta = [] for _, host, port, res, stdout in ret: if res == 'OK': outs = '\n'.join([outs, stdout]) lines = outs.splitlines() filtered_lines = (filter(lambda x: 'noaddr' in x, lines)) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) total_list = [] for line in filtered_lines: total_list.append(line.split()[0]) # Remove duplicates unique_list = list(set(total_list)) for uuid in unique_list: meta.append([uuid]) utils.print_table([['UUID']] + meta)
def failover_with_dir(self, server, dir): """Find masters that use the specified directory path and do failover with its slave :param server: IP or hostname :param dir: directory path """ center = Center() center.update_ip_port() logger.debug('failover_with_dir') master_nodes = center.get_master_obj_list() cluster_id = config.get_cur_cluster_id() lib_path = config.get_ld_library_path(cluster_id) path_of_fb = config.get_path_of_fb(cluster_id) sr2_redis_bin = path_of_fb['sr2_redis_bin'] env_cmd = [ 'GLOBIGNORE=*;', 'export LD_LIBRARY_PATH={};'.format(lib_path['ld_library_path']), 'export DYLD_LIBRARY_PATH={};'.format( lib_path['dyld_library_path']), ] redis_cli_cmd = os.path.join(sr2_redis_bin, 'redis-cli') # Find masters with dir ret = RedisCliUtil.command_all_async('config get dir', slave=True) outs = '' meta = [] m_endpoint = [] for node in master_nodes: m_endpoint.append(node['addr']) for _, host, port, res, stdout in ret: if res == 'OK': flat_stdout = '\n'.join([outs, stdout]) line = flat_stdout.splitlines() if self.compare_ip(host, server) and dir in line[2]: endpoint = '{}:{}'.format(socket.gethostbyname(host), port) if endpoint in m_endpoint: meta.append(endpoint) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) for endpoint in meta: for master_node in master_nodes: if endpoint == master_node['addr']: for slave_node in master_node['slaves']: addr = slave_node['addr'] (s_host, s_port) = addr.split(':') sub_cmd = 'cluster failover takeover' command = '{} {} -h {} -p {} {}'.format( ' '.join(env_cmd), redis_cli_cmd, s_host, s_port, sub_cmd, ) self._print( message.get('try_failover_takeover').format( slave=addr)) stdout = subprocess.check_output(command, shell=True) self._print(stdout)
def _handle(text): if text == '': return if text == 'clear': utils.clear_screen() return text = text.replace('-- --help', '?') text = text.replace('--help', '?') text = text.replace('?', '-- --help') err_flg = True try: fire.Fire(component=Command, command=text) err_flg = False except KeyboardInterrupt: msg = message.get('cancel_command_input') logger.warning('\b\b' + msg) except KeyError as ex: logger.warn('[%s] command fail' % text) logger.exception(ex) except TypeError as ex: logger.exception(ex) except IOError as ex: if ex.errno == 2: msg = message.get('error_file_not_exist').format(file=ex.filename) logger.error(msg) else: logger.exception(ex) except EOFError: msg = message.get('cancel_command_input') logger.warning('\b\b' + msg) except utils.CommandError as ex: logger.exception(ex) except FireError: pass except FireExit: pass except ( HostNameError, HostConnectionError, SSHConnectionError, FileNotExistError, YamlSyntaxError, PropsSyntaxError, PropsKeyError, PropsError, SSHCommandError, ClusterRedisError, ClusterNotExistError, ClusterIdError, EnvError, ) as ex: logger.error('{}: {}'.format(ex.class_name(), str(ex))) except LightningDBError as ex: logger.error('[ErrorCode {}] {}'.format(ex.error_code, str(ex))) except BaseException as ex: logger.exception(ex) finally: return err_flg
def rowcount(self): """Query and show cluster row count """ logger.debug('rowcount') masters = [] center = Center() center.update_ip_port() master_nodes = center.get_master_obj_list() for master_node in master_nodes: node = master_node['addr'] masters.append(node) # open-redis-cli-all info Tablespace | grep totalRows | awk -F ', # ' '{print $4}' | awk -F '=' '{sum += $2} END {print sum}' ret = RedisCliUtil.command_all_async('info Tablespace', slave=True) outs = '' for _, host, port, res, stdout in ret: if res == 'OK': endpoint = '{}:{}'.format(host, port) if endpoint in masters: outs = '\n'.join([outs, stdout]) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) lines = outs.splitlines() key = 'totalRows' partitions = 'partitions' evictions = 'evictedRows' filtered_lines = (filter(lambda x: key in x, lines)) #self._print(filtered_lines) # Table list table_list = [] result = [] for line in filtered_lines: tableStats, _ = line.split(':') tableId = tableStats.split('_') if tableId[1] in table_list: pass else: table_list.append(tableId[1]) for tid in table_list: table_lines = (filter(lambda x: tid in x, filtered_lines)) ld = RedisCliUtil.to_list_of_dict(table_lines) row_count = reduce(lambda x, y: x + int(y[key]), ld, 0) partitions_count = reduce(lambda x, y: x + int(y[partitions]), ld, 0) evictions_count = reduce(lambda x, y: x + int(y[evictions]), ld, 0) result.append([tid, row_count, partitions_count, evictions_count]) utils.print_table( [['Table_ID', 'ROW_COUNT', 'PARTITION_COUNT', 'EVICTED_ROWS']] + result)
def _validate_cluster_id(cluster_id): try: if cluster_id is None: cluster_id = config.get_cur_cluster_id(allow_empty_id=True) elif not utils.is_number(cluster_id): raise ClusterIdError(cluster_id) cluster_id = int(cluster_id) run_cluster_use(cluster_id) return cluster_id except (ClusterIdError, ClusterNotExistError) as ex: logger.warning(ex) cluster_id = -1 run_cluster_use(cluster_id) return cluster_id
def _save_config(f, key, value): inplace_count = 0 for line in fileinput.input(f, inplace=True): words = line.split() if words and words[0] == key: msg = '{key} {value}'.format(key=key, value=value) inplace_count += 1 print(msg) else: print(line, end='') logger.debug('inplace: %d (%s)' % (inplace_count, f)) if inplace_count == 1: logger.debug('save config(%s) success' % f) else: msg = message.get('error_save_config').format(key=key, file=f) logger.warning(msg)
def distribution(self): """Check the distribution of all masters and slaves """ center = Center() center.update_ip_port() logger.debug('distribution') ret = RedisCliUtil.command_all_async('cluster nodes', slave=True) outs = '' for _, host, port, res, stdout in ret: if res == 'OK': outs = '\n'.join([outs, stdout]) lines = outs.splitlines() myself_key = 'myself' filtered_lines = (filter(lambda x: myself_key in x, lines)) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) meta = [] total_masters = 0 total_slaves = 0 for nd in center.master_host_list: num_of_masters = 0 num_of_slaves = 0 node = socket.gethostbyname(nd) host_lines = (filter(lambda x: (node + ':') in x, filtered_lines)) for node in host_lines: params = node.split() endpoint = params[1] roles = params[2] host = endpoint.split(':')[0] role = roles.split(',')[1] if role == 'master': if len(params) == 9: num_of_masters += 1 else: num_of_slaves += 1 total_masters += num_of_masters total_slaves += num_of_slaves hostname = str(socket.gethostbyaddr(host)[0]) + str('(') + str( host) + str(')') meta.append([hostname, num_of_masters, num_of_slaves]) meta.append(['TOTAL', total_masters, total_slaves]) utils.print_table([['HOST', 'MASTER', 'SLAVE']] + meta)
def download_file(url, file_path): download_path = file_path + '.download' file_name = os.path.basename(file_path) try: with open(download_path, 'wb') as f: msg = message.get('file_download').format(file_name=file_name) logger.info(msg) logger.debug('url: {}'.format(url)) logger.debug('installer name: {}'.format(file_name)) response = requests.get(url, stream=True) response.raise_for_status() total_length = response.headers.get('content-length') if total_length is None: f.write(response.content) else: done_length = 0 total_length = int(total_length) for data in response.iter_content(chunk_size=4096): done_length += len(data) f.write(data) done = int(100 * done_length / total_length) comp = '=' * int(done / 2) remain = ' ' * int(50 - int(done / 2)) progress = '\r[{}{}] {}%'.format(comp, remain, done) sys.stdout.write(progress) sys.stdout.flush() print('') shutil.move(download_path, file_path) return True except requests.exceptions.HTTPError as ex: logger.warning(ex) return False except KeyboardInterrupt as ex: print('') raise ex except BaseException as ex: class_name = ex.__class__.__name__ logger.warning('{}: {}'.format(class_name, url)) return False finally: if os.path.isfile(download_path): os.remove(download_path)
def rowcount(self): """Query and show cluster row count """ logger.debug('rowcount') # open-redis-cli-all info Tablespace | grep totalRows | awk -F ', # ' '{print $4}' | awk -F '=' '{sum += $2} END {print sum}' ret = RedisCliUtil.command_all_async('info Tablespace', slave=False) outs = '' for _, host, port, res, stdout in ret: if res == 'OK': outs = '\n'.join([outs, stdout]) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) lines = outs.splitlines() key = 'totalRows' filtered_lines = (filter(lambda x: key in x, lines)) ld = RedisCliUtil.to_list_of_dict(filtered_lines) # row_count = reduce(lambda x, y: {key: int(x[key]) + int(y[key])}, ld) row_count = reduce(lambda x, y: x + int(y[key]), ld, 0) self._print(row_count)
def nodes_with_dir(self, server, dir): """Find nodes that use the specified directory path :param server: IP or hostname :param dir: directory path """ center = Center() center.update_ip_port() logger.debug('nodes_with_dir') ret = RedisCliUtil.command_all_async('config get dir', slave=True) outs = '' meta = [] for _, host, port, res, stdout in ret: if res == 'OK': flat_stdout = '\n'.join([outs, stdout]) line = flat_stdout.splitlines() if self.compare_ip(host, server) and dir in line[2]: meta.append([host, port, line[2]]) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) utils.print_table([['HOST', 'PORT', 'PATH']] + meta)
def run_monitor(n=10, t=2): """Monitoring logs of redis. :param n: number of lines to print log :param t: renewal cycle(sec) """ if not isinstance(n, int): msg = message.get('error_option_type_not_number').format(option='n') logger.error(msg) return if not isinstance(t, int) and not isinstance(t, float): msg = message.get('error_option_type_not_float').format(option='t') logger.error(msg) return try: sp.check_output('which tail', shell=True) except Exception: msg = message.get('error_not_found_command_tail') logger.error(msg) return cluster_id = config.get_cur_cluster_id() path_of_fb = config.get_path_of_fb(cluster_id) sr2_redis_log = path_of_fb['sr2_redis_log'] log_files = '{}/servers*'.format(sr2_redis_log) host_list = config.get_master_host_list() target_host = ask_util.host_for_monitor(host_list) try: sp.check_output('which watch', shell=True) command = "ssh -t {} watch -n {} 'tail -n {} {}'".format( target_host, t, n, log_files) sp.call(command, shell=True) except Exception: msg = message.get('error_not_found_command_watch') logger.warning(msg) logger.info(message.get('message_for_exit')) command = "tail -F -s {} {}".format(t, log_files) client = net.get_ssh(target_host) net.ssh_execute_async(client, command)
def get_installers_from_fb_s3(maximum_number=5): '''bring up to maximum_number installers in the latest order from s3 default value of maximum_value is 5 if there is problem with url or network connection is fail, return empty list return [{ name:string: file name url:string: download url type:string: url type }] ''' ret = [] url = 'https://flashbase.s3.ap-northeast-2.amazonaws.com/latest/latest.html' warning_msg = "Fail to load installer list from '{}'".format(url) try: res = requests.get(url) status_code = res.status_code if status_code >= 400: msg = message.get('error_http_request') msg = msg.format(code=status_code, msg=warning_msg) logger.warning(msg) res_text = str(res.text) res_text = list(map(lambda x: x.strip(), res_text.split('\n'))) filtered = list(filter(lambda x: x.startswith('<a href='), res_text)) for text in filtered: if maximum_number <= 0: break link = parser.get_word_between(text, '<a href="', '">') name = parser.get_word_between(text, '<a href=".*">', '/*</a>') ret.append({'name': name, 'url': link, 'type': 'download'}) maximum_number -= 1 return ret except requests.exceptions.ConnectionError: msg = message.get('error_http_connection').format(msg=warning_msg) logger.warning(msg) return []
def forget_noaddr(self): """Forget noaddr nodes that is not used anymore in cluster """ center = Center() center.update_ip_port() logger.debug('forget_noaddr') ret = RedisCliUtil.command_all_async('cluster nodes', slave=True) outs = '' meta = [] for _, host, port, res, stdout in ret: if res == 'OK': outs = '\n'.join([outs, stdout]) lines = outs.splitlines() filtered_lines = (filter(lambda x: 'noaddr' in x, lines)) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) total_list = [] for line in filtered_lines: total_list.append(line.split()[0]) # Remove duplicates unique_list = list(set(total_list)) # Forget noaddr uuid for uuid in unique_list: sub_cmd = 'cluster forget "{id}" 2>&1'.format(id=uuid) ret = RedisCliUtil.command_all_async(sub_cmd, slave=True) count = 0 for _, host, port, res, stdout in ret: if res == 'OK': count += 1 pass else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) msg = '{num} nodes have forgot {id}'.format(num=count, id=uuid) self._print(msg)
def add_slave(self, yes=False): """Add slave of cluster Add slaves to cluster that configured master only. :param yes: Skip confirm information """ logger.debug('add_slave') if not isinstance(yes, bool): msg = message.get('error_option_type_not_boolean') msg = msg.format(option='yes') logger.error(msg) return center = Center() center.update_ip_port() # check s_hosts = center.slave_host_list s_ports = center.slave_port_list if not s_hosts: msg = message.get('error_slave_host_empty') raise ClusterRedisError(msg) if not s_ports: msg = message.get('error_slave_port_empty') raise ClusterRedisError(msg) success = center.check_hosts_connection(hosts=s_hosts) if not success: return center.ensure_cluster_exist() slave_alive_count = center.get_alive_slave_redis_count() slave_alive_count_mine = center.get_alive_slave_redis_count( check_owner=True ) not_mine_count = slave_alive_count - slave_alive_count_mine if not_mine_count > 0: msg = message.get('error_cluster_start_slave_collision') msg = '\n'.join(msg).format(count=not_mine_count) raise LightningDBError(12, msg) # confirm info result = center.confirm_node_port_info(skip=yes) if not result: msg = message.get('cancel') logger.warning(msg) return # clean center.cluster_clean(master=False) # backup logs center.backup_server_logs(master=False) center.create_redis_data_directory(master=False) # configure center.configure_redis(master=False) center.sync_conf() # start center.start_redis_process(master=False) center.wait_until_all_redis_process_up() # change redis config temporarily key = 'cluster-node-timeout' origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0]) if not origin_s_value: msg = "RedisConfigKeyError: '{}'".format(key) logger.warning(msg) if origin_s_value: # cli config set cluster-node-timeout 2000 logger.debug('set cluster node time out 2000 for create') center.cli_config_set_all(key, '2000', s_hosts, s_ports) # create center.replicate() if origin_s_value: # cli config restore cluster-node-timeout logger.debug('restore cluster node time out') center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
def create(self, yes=False): """Create cluster Before create cluster, all redis should be running. :param yes: skip confirm information """ center = Center() center.update_ip_port() success = center.check_hosts_connection() if not success: return m_count = len(center.master_host_list) * len(center.master_port_list) if m_count < 3: msg = message.get('error_master_redis_less_than_3') raise ClusterRedisError(msg) # if need to cluster start alive_count = center.get_alive_all_redis_count() my_alive_count = center.get_alive_all_redis_count(check_owner=True) if alive_count != my_alive_count: msg = message.get('error_cluster_start_port_collision') raise ClusterRedisError(msg) all_count = len(center.all_host_list) if alive_count < all_count: logger.debug('cluster start in create') # init center.backup_server_logs() center.create_redis_data_directory() # cluster configure center.configure_redis() center.sync_conf(show_result=True) # cluster start center.start_redis_process() center.wait_until_all_redis_process_up() key = 'cluster-node-timeout' m_hosts = center.master_host_list m_ports = center.master_port_list origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0]) if not origin_m_value: msg = "RedisConfigKeyError(master): '{}'".format(key) logger.warning(msg) s_hosts = center.slave_host_list s_ports = center.slave_port_list if s_hosts and s_ports: origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0]) if not origin_s_value: msg = "RedisConfigKeyError(slave): '{}'".format(key) logger.warning(msg) if origin_m_value: # cli config set cluster-node-timeout 2000 logger.debug('set cluster node time out 2000 for create') center.cli_config_set_all(key, '2000', m_hosts, m_ports) if s_hosts and s_ports and origin_s_value: center.cli_config_set_all(key, '2000', s_hosts, s_ports) center.create_cluster(yes) if origin_m_value: # cli config restore cluster-node-timeout logger.debug('restore cluster node time out') center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports) if s_hosts and s_ports and origin_s_value: v = origin_s_value center.cli_config_set_all(key, v, s_hosts, s_ports)
def failover_list(self): """ Find failovered|no-slave|no-slot masters and failbacked slaves """ center = Center() center.update_ip_port() logger.debug('failover_list') master_nodes = center.get_master_obj_list() slave_nodes = center.get_slave_nodes() master_ports = center.master_port_list slave_ports = center.slave_port_list output_msg = [] failovered_masters = [] for master_node in master_nodes: addr = master_node['addr'] port = addr.split(':')[1] try: value = int(port) if value in slave_ports: failovered_masters.append(addr) except ValueError: pass noslave_masters = [] for master_node in master_nodes: if len(master_node['slaves']) == 0: noslave_masters.append(master_node['addr']) else: for slave_node in master_node['slaves']: if slave_node['status'] == 'disconnected': noslave_masters.append(master_node['addr']) break noslot_masters = [] ret = RedisCliUtil.command_all_async('cluster nodes', slave=True) outs = '' for _, host, port, res, stdout in ret: if res == 'OK': outs = '\n'.join([outs, stdout]) lines = outs.splitlines() filtered_nodes = (filter(lambda x: 'myself,master' in x, lines)) else: logger.warning("FAIL {}:{} {}".format(host, port, stdout)) for line in filtered_nodes: words = line.split() if len(words) == 8: noslot_masters.append(line.split()[1]) failbacked_slaves = [] for slave_nodes in slave_nodes: port = slave_nodes.split(':')[1] try: value = int(port) if value in master_ports: failbacked_slaves.append(slave_nodes) except ValueError: pass output_msg.append('1) failovered masters:') output_msg.extend(failovered_masters) output_msg.append('') output_msg.append('2) no-slave masters:') output_msg.extend(noslave_masters) output_msg.append('') output_msg.append('3) no-slot masters:') output_msg.extend(noslot_masters) output_msg.append('') output_msg.append('4) failbacked slaves:') output_msg.extend(failbacked_slaves) output_msg.append('') logger.info(color.ENDC + '\n'.join(output_msg))