def delete_all_pods_for_service(dto): """ 停止所有的某类容器[根据关键字删] :param service: 容器名中的关键字 :return: 结果 """ service = dto["service"] result_list = [] target_host = "10.60.38.181" name_list = K8sObserver.get_pod_name_list('sock-shop') for pod_name in name_list: if service in pod_name and (service + "-db") not in pod_name and ( service + "-external") not in pod_name: target_inject = Cmd['k8s'] + pod_name r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=target_inject) result = r.get_adhoc_result() result_list.append( handle_inject_result('k8s', target_host, target_inject, result, sys._getframe().f_code.co_name, dto['open'])) return result_list
def chaos_inject_mem(dto): find = 0 timeout = '' (target_host, is_exist) = get_target_host(dto) if not is_exist: return 'Host: ' + target_host + ' does not exist.' if dto['percent'] == 'default': target_inject = Default_cmd['mem'] else: target_inject = Cmd['mem'] + dto['percent'] if dto['timeout'] == 'default': timeout = ' --timeout 300' elif dto['timeout'] != 'no': timeout = ' --timeout ' + dto['timeout'] for i in range(0, len(has_injected)): if has_injected[i]['host'] == target_host and has_injected[i][ 'inject_type'] == 'mem': find = 1 if find == 0: r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=target_inject + timeout) result = r.get_adhoc_result() return handle_inject_result('mem', target_host, target_inject + timeout, result, sys._getframe().f_code.co_name, dto['open']) else: Logger.log( "error", "HOST HAS BEEN INJECTED - Method : " + sys._getframe().f_code.co_name + "() - - " + target_host) return "The host's mem has been injected"
def chaos_inject_network(dto): find = 0 time = '3000' timeout = '600' (target_host, is_exist) = get_target_host(dto) if not is_exist: return 'Host: ' + target_host + ' does not exist.' if dto['time'] != 'default': time = dto['time'] if dto['timeout'] != 'default': timeout = dto['timeout'] target_inject = Cmd[ 'network'] + '--time ' + time + ' --timeout ' + timeout for i in range(0, len(has_injected)): if has_injected[i]['host'] == target_host and has_injected[i][ 'inject_type'] == 'network': find = 1 if find == 0: r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=target_inject) result = r.get_adhoc_result() return handle_inject_result('network', target_host, target_inject + timeout, result, sys._getframe().f_code.co_name, dto['open']) else: return "The host's network has been injected"
def inject_io(dto): r = Runner() r.run_ad_hoc(hosts=dto['host'], module='shell', args='stress -i 100 -t ' + dto['inject_duration'] + ' > /dev/null 2>&1') result = r.get_adhoc_result() return result
def inject_mem(dto): r = Runner() r.run_ad_hoc(hosts=dto['host'], module='shell', args='stress --vm 4 --vm-bytes 1G --vm-hang ' + dto['inject_duration'] + ' -t ' + dto['inject_duration'] + ' > /dev/null 2>&1') result = r.get_adhoc_result() return result
def chaosinjectvm(dto): i = random.randint(0, 4) j = random.randint(0, 2) r = Runner() r.run_ad_hoc(hosts=HostsVM[i], module='shell', args=Cmd[j] + " --timeout " + dto['inject_duration']) result = r.get_adhoc_result() return result
def chaosinject1(dto): #i=random.randint(0, 4) #j=random.randint(0, 2) r = Runner() r.run_ad_hoc(hosts="192.168.199.31", module='shell', args="blade create disk fill --size 1000 --timeout " + dto['inject_duration']) result = r.get_adhoc_result() return result
def view_inject_on_host_by_status(dto): (target_host, is_exist) = get_target_host(dto) if not is_exist: return 'Host: ' + dto['host'] + ' does not exist.' target_inject = "./blade status --type create" status_type = str(dto['status']).capitalize() r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=target_inject) result = r.get_adhoc_result() inject_list = [] if len(result['success']) > 0: transform_ip = result['success'].keys()[0] info = \ json.loads(result['success'][transform_ip]['stdout'].encode('unicode-escape').decode('string_escape'))[ 'result'] for i in info: if i['Status'] == status_type: inject_list.append(i) try: requests.post(url=target_url, params={'content': str(inject_list)}, verify=False, timeout=2) except Timeout: pass finally: Logger.log( "info", "SUCCESS - Method : " + sys._getframe().f_code.co_name + "() - - " + str(dto)) return inject_list else: if len(result["unreachable"]) > 0: transform_ip = result["unreachable"].keys()[0] message = result["unreachable"][transform_ip]["msg"] flag = "UNREACHABLE" else: transform_ip = result["failed"].keys()[0] message = result["failed"][transform_ip]["msg"] flag = "FAILED" view_info = { "ip": target_host, "status type": status_type, "cmd": target_inject, "message": message } Logger.log( "error", flag + " - Method : " + sys._getframe().f_code.co_name + "() - - " + str(view_info)) return inject_list
def stop_all_chaos_inject_on_all_nodes(mq_control): uid_list = [] result_list = [] for target_host in Hosts: dto = { 'host': target_host, } target_inject = './blade status --type create' r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=target_inject) result = r.get_adhoc_result() if len(result["success"]) > 0: transform_ip = result["success"].keys()[0] info = json.loads( result["success"][transform_ip]["stdout"].encode( 'unicode-escape').decode('string_escape'))["result"] for i in info: if i["Status"] == "Success": uid_list.append(i["Uid"]) if len(uid_list) == 0: Logger.log( 'info', 'NO CHAOS INJECT - Method : ' + sys._getframe().f_code.co_name + "() - - " + str(dto)) for item in uid_list: cmd = './blade destroy ' + item r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=cmd) result = r.get_adhoc_result() if len(result["success"]) > 0: global inject_info global has_injected inject_info = [] has_injected = [] # for i in range(0, len(inject_info)): # if inject_info[i]['cmd_id'] == item: # target_host = inject_info[i]['ip'] # inject_info.pop(i) # for i in range(0, len(has_injected)): # if target_host == has_injected[i]['host'] and item == has_injected[i]['tag']: # has_injected.pop(i) # break result_list.append( handle_inject_result("destroy", target_host, cmd, result, sys._getframe().f_code.co_name, mq_control['open'])) return result_list
def get_pod_log(pod_name, namespace, request_time): """ 获取某个namespace下, 指定pod的日志, 并输出到指定的文件夹 :param pod_name: pod name :param namespace: namespace :param request_time: 发出请求的时间 :return: success or failed """ dir_name = request_time command = '/opt/kube/bin/kubectl logs ' + pod_name + ' -n ' + namespace r = Runner() r.run_ad_hoc(hosts="10.60.38.181", module='shell', args=command) result = r.get_adhoc_result() if len(result["success"]) > 0: transform_ip = result["success"].keys()[0] stdout_lines = result["success"][transform_ip]["stdout_lines"] cwd = os.getcwd() dir_path = cwd + "/pod_log_dir/" + str(dir_name) is_exist = os.path.exists(dir_path) if not is_exist: os.makedirs(dir_path) log_path = dir_path + "/" + pod_name + ".txt" with open(log_path, 'a') as f: for line in stdout_lines: f.write(line) f.write('\n') info = {"pod name": pod_name, "command": command, "log path": log_path} Logger.log( "info", "SUCCESS - Method : " + sys._getframe().f_code.co_name + "() - - " + str(info)) else: if len(result["unreachable"]) > 0: transform_ip = result["unreachable"].keys()[0] message = result["unreachable"][transform_ip]["msg"] flag = "UNREACHABLE" else: transform_ip = result["failed"].keys()[0] message = result["failed"][transform_ip]["msg"] flag = "FAILED" info = {"pod name": pod_name, "command": command, "message": message} Logger.log( "error", flag + " - Method : " + sys._getframe().f_code.co_name + "() - - " + str(info)) return info
def chaos_inject_random(dto): global Default_cmd Default_cmd = DefaultCmd.get_default_cmd() find = 0 timeout = '' pod_inject = '' (target_host, is_exist) = get_target_host(dto) if not is_exist: return 'Host: ' + target_host + ' does not exist.' j = random.randint(0, len(Default_cmd) - 1) inject_type = Default_cmd.keys()[j] if inject_type == "k8s": pod_list = K8sObserver.get_pod_name_list("sock-shop") k = random.randint(0, len(pod_list) - 1) pod_inject = pod_list[k] target_inject = Default_cmd[Default_cmd.keys() [j]] + " " + pod_inject else: target_inject = Default_cmd[Default_cmd.keys()[j]] if dto['timeout'] == 'default': timeout = ' --timeout 300' elif dto['timeout'] != 'no': timeout = ' --timeout ' + dto['timeout'] for i in range(0, len(has_injected)): if has_injected[i]['host'] == target_host and has_injected[i][ 'inject_type'] == target_inject: find = 1 if find == 0: if inject_type != "k8s": target_inject = target_inject + timeout r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=target_inject) result = r.get_adhoc_result() return handle_inject_result(inject_type, target_host, target_inject, result, sys._getframe().f_code.co_name, dto['open']) else: Logger.log( "error", "HOST HAS BEEN INJECTED - Method : " + sys._getframe().f_code.co_name + "() - - " + str(dto)) return "The host has been injected by the inject"
def get_pods_status(namespace): r = Runner() r.run_ad_hoc(hosts="10.60.38.181", module='shell', args='/opt/kube/bin/kubectl get pods -n ' + namespace) result = r.get_adhoc_result() result_status = [] if len(result['success']) > 0: host_name = result['success'].keys()[0] stdout_lines = result['success'][host_name]['stdout_lines'] for index in range(1, len(stdout_lines)): stdout_line = stdout_lines[index].split() result_status.append({ 'name': stdout_line[0], 'status': stdout_line[2], 'restarts': stdout_line[3], 'age': stdout_line[4], }) return result_status
def stop_all_on_specific_node(dto): (target_host, is_exist) = get_target_host(dto) if not is_exist: return 'Host: ' + target_host + ' does not exist.' target_inject = './blade status --type create' r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=target_inject) result = r.get_adhoc_result() uid_list = [] result_list = [] if len(result["success"]) > 0: transform_ip = result["success"].keys()[0] info = json.loads(result["success"][transform_ip]["stdout"].encode( 'unicode-escape').decode('string_escape'))["result"] for i in info: if i["Status"] == "Success": uid_list.append(i["Uid"]) if len(uid_list) == 0: Logger.log( 'info', 'NO CHAOS INJECT - Method : ' + sys._getframe().f_code.co_name + "() - - " + str(dto)) return 'There is no injected attack on this host' + target_host for item in uid_list: cmd = './blade destroy ' + item r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=cmd) result = r.get_adhoc_result() # if len(result["success"]) > 0: # for i in range(0, len(inject_info)): # if inject_info[i]['cmd_id'] == item: # if target_host == inject_info[i]['ip']: # inject_info.pop(i) # for i in range(0, len(has_injected)): # if target_host == has_injected[i]['host'] and item == has_injected[i]['tag']: # has_injected.pop(i) # break result_list.append( handle_inject_result("destroy", target_host, cmd, result, sys._getframe().f_code.co_name, dto['open'])) return result_list
def chaos_inject_pod_single(dto): find = 0 (target_host, is_exist) = get_target_host(dto) if not is_exist: return 'Host: ' + target_host + ' does not exist.' target_inject = Cmd['k8s'] + dto['pod'] for i in range(0, len(has_injected)): if has_injected[i]['host'] == target_host and has_injected[i][ 'inject_type'] == target_inject: find = 1 if find == 0: r = Runner() r.run_ad_hoc(hosts=target_host, module='shell', args=target_inject) result = r.get_adhoc_result() print(result) return handle_inject_result('k8s', target_host, target_inject, result, sys._getframe().f_code.co_name, dto['open']) else: return 'The pod has been injected'
def get_pod_name_list(namespace): """ 获取某namespace下所有的pods名称 注意: 该函数只供fault_injector.py调用 :param namespace :return: pods name list """ r = Runner() r.run_ad_hoc(hosts="10.60.38.181", module='shell', args="/opt/kube/bin/kubectl get pods -n " + namespace) result = r.get_adhoc_result() name_list = [] if len(result["success"]) > 0: transform_ip = result["success"].keys()[0] stdout_lines = result["success"][transform_ip]["stdout_lines"] for line in stdout_lines[1:]: line = line.split() name = line[0] name_list.append(name) return name_list
def stop_specific_chaos_inject(dto): stop_id = dto['tag'] target_host = '' find = 0 key = 0 for i in range(0, len(inject_info)): if inject_info[i]['cmd_id'] == stop_id: target_host = inject_info[i]['ip'] find = 1 key = i for i in range(0, len(has_injected)): if target_host == has_injected[i][ 'host'] and stop_id == has_injected[i]['tag']: has_injected.pop(i) break if find == 1: r = Runner() r.run_ad_hoc(hosts=Spare_hosts[inject_info[key]['ip']], module='shell', args='./blade destroy ' + stop_id) result = r.get_adhoc_result() if len(result["success"]) > 0: transform_ip = result["success"].keys()[0] the_stop_info = { "position": inject_info[key]["position"], "ip": Spare_hosts[inject_info[key]["ip"]], "start_time": result["success"][transform_ip]["start"], "cmd": result["success"][transform_ip]["cmd"], } inject_info.pop(key) Logger.log( 'info', 'SUCCESS - Method : ' + sys._getframe().f_code.co_name + "() - - " + str(the_stop_info)) if dto['open'] == 'true': RabbitMq.connect(the_stop_info) return result else: if len(result["unreachable"]) > 0: transform_ip = result["unreachable"].keys()[0] message = result["unreachable"][transform_ip]["msg"] flag = "UNREACHABLE" else: transform_ip = result["failed"].keys()[0] message = result["failed"][transform_ip]["msg"] flag = "FAILED" the_stop_info = { "position": inject_info[key]["position"], "ip": Spare_hosts[inject_info[key]["ip"]], "cmd": "./blade destroy " + stop_id, "message": message } Logger.log( "error", flag + " - Method : " + sys._getframe().f_code.co_name + "() - - " + str(the_stop_info)) return result else: the_stop_info = { "cmd": "./blade destroy " + stop_id, } Logger.log( 'error', 'UID NOT FOUND - Method : ' + sys._getframe().f_code.co_name + "() - - " + str(the_stop_info)) return 'Inject not found'
def get_service_log(service, namespace): """ 获取某个namespace下, 指定service的全部日志 :param service: service name :param namespace: namespace :return: success or failed """ if service not in service_name: info = { "error": "The service does not exist. Please check the service name!", } Logger.log( "error", "SERVICE NOT EXIST - Method : " + sys._getframe().f_code.co_name + "() - - " + str(info)) return "The service" + service + " does not exist. Please check the service name!" r = Runner() r.run_ad_hoc(hosts="10.60.38.181", module='shell', args="/opt/kube/bin/kubectl get pods -n " + namespace) result = r.get_adhoc_result() name_list = [] if len(result["success"]) > 0: transform_ip = result["success"].keys()[0] stdout_lines = result["success"][transform_ip]["stdout_lines"] for line in stdout_lines[1:]: if service in line and (service + "-db") not in line and ( service + "-external") not in line: line = line.split() name = line[0] name_list.append(name) else: continue print name_list if len(name_list) != 0: info = { "success": "The " + service + "service pod list was successfully obtained!", "pod": name_list } Logger.log( "info", "SUCCESS - Method : " + sys._getframe().f_code.co_name + "() - - " + str(info)) else: info = { "error": "The service" + service + " has no pod list!", } Logger.log( "error", "NO POD - Method : " + sys._getframe().f_code.co_name + "() - - " + str(info)) return "The service" + service + " has no pod list!" else: if len(result["unreachable"]) > 0: transform_ip = result["unreachable"].keys()[0] message = result["unreachable"][transform_ip]["msg"] flag = "UNREACHABLE" else: transform_ip = result["failed"].keys()[0] message = result["failed"][transform_ip]["msg"] flag = "FAILED" info = { "error": "Failed to get the pod list of service " + service, "message": message } Logger.log( "error", flag + " - Method : " + sys._getframe().f_code.co_name + "() - - " + str(info)) return "Failed to get the pod list of services " + service request_time = time.strftime('%Y-%m-%d/%H:%M:%S', time.localtime(time.time())) result = [] for pod_name in name_list: result.append(get_pod_log(pod_name, namespace, request_time)) return result