def is_controllable(expr, flag=None): # 获取表达式中的变量,看是否在用户可控变量列表中 """ 判断赋值表达式是否是用户可控的 :param expr: :return: """ controlled_params = [ '$_GET', '$_POST', '$_REQUEST', '$_COOKIE', '$_FILES', '$_SERVER', '$HTTP_POST_FILES', '$HTTP_COOKIE_VARS', '$HTTP_REQUEST_VARS', '$HTTP_POST_VARS', '$HTTP_RAW_POST_DATA', '$HTTP_GET_VARS' ] if expr in controlled_params: # 当为可控变量时 返回1 logger.debug('[AST] is_controllable --> {expr}'.format(expr=expr)) if flag: return 1, expr return 1, php.Variable(expr) if expr.startswith("$"): if flag: return 3, expr return 3, php.Variable(expr) return -1, php.Variable(expr)
def array_back(param, nodes): # 回溯数组定义赋值 """ 递归回溯数组赋值定义 :param param: :param nodes: :return: """ param_name = param.node.name param_expr = param.expr is_co = 3 cp = param expr_lineno = 0 # print nodes for node in nodes[::-1]: if isinstance(node, php.Assignment): param_node_name = get_node_name(node.node) param_node = node.node param_node_expr = node.expr if param_node_name == param_name: # 处理数组中值被改变的问题 if isinstance(node.expr, php.Array): for p_node in node.expr.nodes: if p_node.key == param_expr: if isinstance(p_node.value, php.ArrayOffset): # 如果赋值值仍然是数组,先经过判断在进入递归 is_co, cp = is_controllable(p_node.value.node.name) if is_co != 1: is_co, cp, expr_lineno = array_back(param, nodes) else: n_node = php.Variable(p_node.value) is_co, cp, expr_lineno = parameters_back(n_node, nodes) if param == param_node: # 处理数组一次性赋值,左值为数组 if isinstance(param_node_expr, php.ArrayOffset): # 如果赋值值仍然是数组,先经过判断在进入递归 is_co, cp = is_controllable(param_node_expr.node.name) if is_co != 1: is_co, cp, expr_lineno = array_back(param, nodes) else: is_co, cp = is_controllable(param_node_expr) print is_co if is_co != 1 and is_co != -1: n_node = php.Variable(param_node_expr.node.value) is_co, cp, expr_lineno = parameters_back(n_node, nodes) return is_co, cp, expr_lineno
def deep_parameters_back(node, back_node, function_params, count, file_path): """ 深度递归遍历 :param node: :param back_node: :param function_params: :param file_path: :return: """ count += 1 params = get_node_name(node) is_co, cp, expr_lineno = parameters_back(params, back_node, function_params) if count > 20: logger.warning("[Deep AST] depth too big to auto exit...") return is_co, cp, expr_lineno if is_co == 3: logger.debug("[Deep AST] try to find include, start deep AST") for node in back_node[::-1]: if isinstance(node, php.Include): filename = node.expr file_path = re.split(r"[\/\\]", file_path) file_path.pop() file_path.append(filename) file_path = "/".join(file_path) try: logger.debug("[Deep AST] open new file {file_path}".format( file_path=file_path)) f = open(file_path, 'r') file_content = f.read() except: logger.warning( "[Deep AST] error to open new file...continue") continue parser = make_parser() all_nodes = parser.parse(file_content, debug=False, lexer=lexer.clone(), tracking=with_line) node = php.Variable(cp) is_co, cp, expr_lineno = deep_parameters_back( node, all_nodes, function_params, count, file_path) if is_co == -1: break return is_co, cp, expr_lineno
def analysis_functioncall_node(node, back_node, vul_function, vul_lineno, function_params=None, file_path=None): """ 处理FunctionCall类型节点-->取出参数-->回溯判断参数是否可控-->输出结果 :param node: :param back_node: :param vul_function: :param vul_lineno: :param function_params: :return: """ logger.debug('[AST] vul_function:{v}'.format(v=vul_function)) params = get_all_params(node.params) for param in params: param = php.Variable(param) is_co, cp, expr_lineno = parameters_back(param, back_node, function_params) set_scan_results(is_co, cp, expr_lineno, vul_function, param, vul_lineno)
def anlysis_params(param, code_content, file_path, lineno): """ 在cast调用时做中转数据预处理 :param lineno: :param param: :param code_content: :param file_path: :return: """ count = 0 function_params = None param = php.Variable(param) parser = make_parser() all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line) vul_nodes = [] for node in all_nodes: if node.lineno < int(lineno): vul_nodes.append(node) is_co, cp, expr_lineno = deep_parameters_back(param, vul_nodes, function_params, count, file_path, lineno) return is_co, cp, expr_lineno
import os # for django os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'Kunlun_M.settings') import django django.setup() from core.engine import scan from core.engine import init_match_rule from Kunlun_M.settings import EXAMPLES_PATH from utils.log import logger from phply import phpast as php def test_scan(): logger.info('Examples Path: {path}'.format(path=EXAMPLES_PATH)) assert scan(EXAMPLES_PATH) data = (php.Method(u'eval_function', [], [php.FormalParameter(u'$a', None, False, None)], [php.Eval(php.Variable(u'$a'))], False), php.Variable(u'$a'), "eval_function") def test_init_match_rule(): assert isinstance(init_match_rule(data), tuple) assert "eval_function" in init_match_rule(data)[1]
from cobra.engine import scan from cobra.engine import init_match_rule from cobra.config import examples_path from cobra.log import logger from phply import phpast as php def test_scan(): logger.info('Examples Path: {path}'.format(path=examples_path)) assert scan(examples_path) data = (php.Method(u'eval_function', [], [php.FormalParameter(u'$a', None, False, None)], [php.Eval(php.Variable(u'$a'))], False), php.Variable(u'$a')) def test_init_match_rule(): assert isinstance(init_match_rule(data), tuple) assert "eval_function" in init_match_rule(data)[1]
def parameters_back(param, nodes, function_params=None, lineno=0, function_flag=0): # 用来得到回溯过程中的被赋值的变量是否与敏感函数变量相等,param是当前需要跟踪的污点 """ 递归回溯敏感函数的赋值流程,param为跟踪的污点,当找到param来源时-->分析复制表达式-->获取新污点;否则递归下一个节点 :param param: :param nodes: :param function_params: :param lineno :param flineno: 已经分析过的行数,用于解决已经分析完成函数内容问题 :return: """ if isinstance(param, php.FunctionCall): # 当污点为寻找函数时,递归进入寻找函数 is_co, cp, expr_lineno = function_back(param, nodes, function_params) return is_co, cp, expr_lineno if isinstance(param, php.ArrayOffset): # 当污点为数组时,递归进入寻找数组声明或赋值 is_co, cp, expr_lineno = array_back(param, nodes) return is_co, cp, expr_lineno expr_lineno = 0 # source所在行号 param_name = param.name is_co, cp = is_controllable(param_name) if len(nodes) != 0 and is_co != 1: node = nodes[len(nodes) - 1] if isinstance(node, php.Assignment): # 回溯的过程中,对出现赋值情况的节点进行跟踪 param_node = get_node_name(node.node) # param_node为被赋值的变量 param_expr, expr_lineno, is_re = get_expr_name(node.expr) # param_expr为赋值表达式,param_expr为变量或者列表 if param_name == param_node and not isinstance(param_expr, list): # 找到变量的来源,开始继续分析变量的赋值表达式是否可控 is_co, cp = is_controllable(param_expr) # 开始判断变量是否可控 if is_co != 1: is_co, cp = is_sink_function(param_expr, function_params) if isinstance(node.expr, php.ArrayOffset): param = node.expr else: param = php.Variable(param_expr) # 每次找到一个污点的来源时,开始跟踪新污点,覆盖旧污点 if param_name == param_node and isinstance(node.expr, php.FunctionCall): # 当变量来源是函数时,处理函数内容 function_name = node.expr.name param = node.expr # 如果没找到函数定义,则将函数作为变量回溯 for node in nodes[::-1]: if isinstance(node, php.Function): if node.name == function_name: function_nodes = node.nodes # 进入递归函数内语句 for function_node in function_nodes: if isinstance(function_node, php.Return): return_node = function_node.node return_param = return_node.node is_co, cp, expr_lineno = parameters_back(return_param, function_nodes, function_params, lineno, function_flag=1) if param_name == param_node and isinstance(param_expr, list): for expr in param_expr: param = expr is_co, cp = is_controllable(expr) if is_co == 1: return is_co, cp, expr_lineno param = php.Variable(param) _is_co, _cp, expr_lineno = parameters_back(param, nodes[:-1], function_params, lineno, function_flag=1) if _is_co != -1: # 当参数可控时,值赋给is_co 和 cp,有一个参数可控,则认定这个函数可能可控 is_co = _is_co cp = _cp elif isinstance(node, php.Function) and function_flag == 0: function_nodes = node.nodes function_lineno = node.lineno function_params = node.params vul_nodes = [] for function_node in function_nodes: if int(function_lineno) <= function_node.lineno < int(lineno): vul_nodes.append(function_node) if len(vul_nodes) > 0: is_co, cp, expr_lineno = parameters_back(param, function_nodes, function_params, function_lineno, function_flag=1) if is_co == 3: # 出现新的敏感函数,重新生成新的漏洞结构,进入新的遍历结构 logger.info("[Deep AST] Now vulnerability param become to function {}() param {}".format(node.name, cp.name)) is_co = 4 cp = tuple([node, param]) return is_co, cp, 0 if is_co != 1 and is_co != -1: # 当is_co为True时找到可控,停止递归 is_co, cp, expr_lineno = parameters_back(param, nodes[:-1], function_params, lineno, function_flag=1) # 找到可控的输入时,停止递归 elif len(nodes) == 0 and function_params is not None: # 考虑函数参数情况 for function_param in function_params: if function_param == param: is_co = 2 cp = function_param return is_co, cp, expr_lineno