def func_similarity_cfg_level(soft_db, funcs, character_db, func_name, worksheet): # @db1 待比对数据库 # @db2 漏洞特征数据库 # @func_name 目标函数名 #过滤一下 ast_root = get_function_ast_root(character_db, func_name) if ast_root is None: print "no function found" return return_type = get_function_return_type(character_db, ast_root) # 获取目标函数返回值类型 param_list = get_function_param_list(character_db, ast_root) # 获取目标函数参数类型列表 filter_funcs = filter_functions(soft_db, funcs, return_type, param_list) # 过滤待比较函数 tar_func = get_function_node(character_db, func_name) for ast_root in filter_funcs: src_func = get_function_node_by_ast_root(soft_db, ast_root) match, simi = func_cfg_similarity(src_func, soft_db, tar_func, character_db) if match: file = get_function_file(soft_db, src_func.properties[u'name'])[41:] worksheet.append( (func_name, file, src_func.properties[u'name'],match, round(simi,4) )) elif simi == -1: print u"节点太多,未进行比较 "
def func_similarity_astLevel(db1, funcs, db2, func_name, suffix_tree_obj, worksheet): # @db1 待比对数据库 # @db2 漏洞特征数据库 # @func_name 目标函数名 target_func = get_function_ast_root(db2, func_name) return_type = get_function_return_type(db2, target_func) # 获取目标函数返回值类型 param_list = get_function_param_list(db2, target_func) # 获取目标函数参数类型列表 # funcs = getAllFuncs(db1) #获取所有函数 filter_funcs = filter_functions(db1, funcs, return_type, param_list) # 过滤待比较函数 ret = serializedAST(db2).genSerilizedAST(target_func) pattern1 = ";".join(ret[0][2:]) pattern2 = ";".join(ret[1][2:]) pattern3 = ";".join(ret[2][2:]) pattern4 = ";".join(ret[3][2:]) for func in filter_funcs: ast_root = get_function_ast_root(db1, func.properties[u'name']) s1 = serializedAST(db1, True, True).genSerilizedAST(ast_root)[0][:-1] s2 = serializedAST(db1, False, True).genSerilizedAST(ast_root)[0][:-1] s3 = serializedAST(db1, True, False).genSerilizedAST(ast_root)[0][:-1] s4 = serializedAST(db1, False, False).genSerilizedAST(ast_root)[0][:-1] report = {} if suffix_tree_obj.search(s1, pattern1): report['distinct_type_and_const'] = True if suffix_tree_obj.search(s2, pattern2): report['distinct_const_no_type'] = True if suffix_tree_obj.search(s3, pattern3): report['distinct_type_no_const'] = True if suffix_tree_obj.search(s4, pattern4): report['distinct_type_no_const'] = True if report['distinct_type_and_const'] or report['distinct_const_no_type']\ or report['distinct_type_no_const'] or report['no_type_no_const']: file = get_function_file(db1, func.properties[u'name']) worksheet.append( (func_name, file, func.properties[u'name'],report['distinct_type_and_const'], report['distinct_const_no_type'], report['distinct_type_no_const'], report['distinct_type_no_const'] ))
def get_type_mapping_table(neo4j_db, func_name): ast_root = get_function_ast_root(neo4j_db, func_name) if ast_root is None: print u"节点不存在" return {'other':'v'} else: ser = serializedAST(neo4j_db) ser.genSerilizedAST(ast_root) return ser.variable_maps
def get_software_var_map(soft, port): neo4j_db = Graph("http://127.0.0.1:%d/db/data/" % port) sql_db = sqlite3.connect("/home/bert/Documents/data/" + soft + ".db") sql_db.execute('''create table if not exists %s( func_id INT PRIMARY KEY, func_name CHAR(100) NOT NULL, file CHAR(200) NOT NULL, var_map TEXT NOT NULL, ast_type_const TEXT NOT NULL, ast_type_only TEXT NOT NULL, ast_const_only TEXT NOT NULL, ast_no_type_const TEXT NOT NULL, no_mapping TEXT NOT NULL)''' % soft) sql_db.commit() funcs = get_all_functions(neo4j_db) open("" + len(funcs).__str__(), "w") print "get all functions OK:", len(funcs) for func in funcs: # 查重 ret = sql_db.execute("select * from %s where func_id=?" % soft, (func._id, )) if ret.fetchone(): continue print "[%s] processing %s " % (datetime.datetime.now().strftime( "%y-%m-%d %H:%M:%S"), func.properties[u'name']) try: ast_root = get_function_ast_root(neo4j_db, func) func_file = get_function_file(neo4j_db, func) ser = serializedAST(neo4j_db) ret = ser.genSerilizedAST(ast_root) var_map = ser.variable_maps ast1 = ";".join(ret[0]) ast2 = ";".join(ret[1]) ast3 = ";".join(ret[2]) ast4 = ";".join(ret[3]) ast5 = ";".join(ret[4]) except Exception, e: traceback.print_exc() try: sql_db.execute( 'insert into %s values(?, ?, ?, ?, ?, ?, ?, ?,?)' % soft, (func._id, func.properties[u'name'], func_file, var_map.__str__(), ast1, ast2, ast3, ast4, ast5)) sql_db.commit() except Exception, e: print e
def func_similarity_segement_level(db1, funcs, db2, func_name, db_table): # @db1 待比对数据库 # @db2 代码段数据库 # @func_name 代码段构成的函数名 neo4j_db1 = Graph(db1) neo4j_db2 = Graph(db2) suffix_tree_obj = suffixtree() #sqlite db_conn = sqlite3.connect("/home/bert/Documents/data/soft_test.db") db_conn.execute("""create table if not exists %s( func_id INT PRIMARY KEY, func_name CHAR(100) NOT NULL, file CHAR(200) NOT NULL, vuln_segement CHAR(100) NOT NULL, distinct_type_and_const BOOLEAN, distinct_const_no_type BOOLEAN, distinct_type_no_const BOOLEAN, no_type_no_const BOOLEAN)""" % db_table) db_conn.commit() target_func = get_function_ast_root(neo4j_db2, func_name) if target_func is None: print "%s is not found" % func_name return ret = serializedAST(neo4j_db2).genSerilizedAST(target_func) pattern1 = ";".join(ret[0][2:]) pattern2 = ";".join(ret[1][2:]) pattern3 = ";".join(ret[2][2:]) pattern4 = ";".join(ret[3][2:]) for func in funcs: print "[%s] processing %s VS %s" % ( datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S"), func[1], func_name) ast_root = get_function_ast_root(neo4j_db1, func[0]) if ast_root is None: print "function not found:", func[0], func[1] tmp = serializedAST(neo4j_db1).genSerilizedAST(ast_root) s1 = ";".join(tmp[0]) s2 = ";".join(tmp[1]) s3 = ";".join(tmp[2]) s4 = ";".join(tmp[3]) report = {} try: if suffix_tree_obj.search(s1, pattern1): report['distinct_type_and_const'] = True else: report['distinct_type_and_const'] = False if suffix_tree_obj.search(s2, pattern2): report['distinct_const_no_type'] = True else: report['distinct_const_no_type'] = False if suffix_tree_obj.search(s3, pattern3): report['distinct_type_no_const'] = True else: report['distinct_type_no_const'] = False if suffix_tree_obj.search(s4, pattern4): report['no_type_no_const'] = True else: report['no_type_no_const'] = False query = "insert into %s values(?,?,?,?,?,?,?,?)" % db_table db_conn.execute(query, (func[0], func[1], func[2], func_name, report['distinct_type_and_const'], report['distinct_const_no_type'], report['distinct_type_no_const'], report['no_type_no_const']) ) db_conn.commit() except Exception,e: log_file = open("suffix_tree_error.log","a") log_file.writelines( [datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S") + " " + e.__str__(), s1, pattern1]) log_file.flush() log_file.close()
def vuln_patch_compare(conn, neo4jdb, vuln_info, worksheet, suffix_tree_obj): cve_info = vuln_info.get_cve_info(conn) print "[%s] processing %s" % (datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S"), cve_info.cveid) vuln_name = cve_info.cveid.replace("-", "_").upper() + "_VULN_" + vuln_info.vuln_func patch_name = cve_info.cveid.replace("-", "_").upper() + "_PATCHED_" + vuln_info.vuln_func start_time = time.time() status = "" vuln_func = get_function_ast_root(neo4jdb, vuln_name) if vuln_func is None: status = "vuln_func_not_found" line = process_line(conn, vuln_info, status, None, 0) worksheet.append(line) return patched_func = get_function_ast_root(neo4jdb, patch_name) if patched_func is None: status = "patched_func_not_found" line = process_line(conn, vuln_info, status, None, 0) worksheet.append(line) return #序列化AST返回值是一个数组,0元素是序列化的AST字符串,1元素是节点个数,AST字符串以;结尾,需要去掉结尾的; pattern1 = serializedAST(neo4jdb, True, True).genSerilizedAST(vuln_func)[0][:-1] pattern2 = serializedAST(neo4jdb, False, True).genSerilizedAST(vuln_func)[0][:-1] pattern3 = serializedAST(neo4jdb, True, False).genSerilizedAST(vuln_func)[0][:-1] pattern4 = serializedAST(neo4jdb, False, False).genSerilizedAST(vuln_func)[0][:-1] #delete FunctionDef and CompoundStatement node prefix_str = r"^FunctionDef\([0-9]+\);CompoundStatement\([0-9]+\);" pattern1 = re.sub(prefix_str, "", pattern1) pattern2 = re.sub(prefix_str, "", pattern2) pattern3 = re.sub(prefix_str, "", pattern3) pattern4 = re.sub(prefix_str, "", pattern4) s1 = serializedAST(neo4jdb, True, True).genSerilizedAST(patched_func)[0][:-1] s2 = serializedAST(neo4jdb, False, True).genSerilizedAST(patched_func)[0][:-1] s3 = serializedAST(neo4jdb, True, False).genSerilizedAST(patched_func)[0][:-1] s4 = serializedAST(neo4jdb, False, False).genSerilizedAST(patched_func)[0][:-1] report = {} if suffix_tree_obj.search(s1, pattern1): report['distinct_type_and_const'] = True else: report['distinct_type_and_const'] = False if suffix_tree_obj.search(s2, pattern2): report['distinct_const_no_type'] = True else: report['distinct_const_no_type'] = False if suffix_tree_obj.search(s3, pattern3): report['distinct_type_no_const'] = True else: report['distinct_type_no_const'] = False if suffix_tree_obj.search(s4, pattern4): report['no_type_no_const'] = True else: report['no_type_no_const'] = False status = "success" end_time = time.time() cost = round(end_time - start_time, 2) line = process_line(conn, vuln_info, status, report, cost) worksheet.append(line) return
def segement_ast_similarity_process(vuln_name, patch_name, neo4jdb, org_func_name, type_mapping, worksheet, suffix_tree_obj): start_time = time.time() print "[%s] processing %s" % (datetime.datetime.now().strftime( "%y-%m-%d %H:%M:%S"), vuln_name + " vs " + patch_name) #检查数据库里面是否可以找到该函数 vuln_func = get_function_ast_root(neo4jdb, vuln_name) if vuln_func is None: line = (vuln_name, patch_name, "vuln_func_not_found", "-", "-", "-", "-", 0, org_func_name, type_mapping.__str__()) worksheet.append(line) return #检查数据库里面是否可以找到该函数 patched_func = get_function_ast_root(neo4jdb, patch_name) if patched_func is None: line = (vuln_name, patch_name, "patch_func_not_found", "-", "-", "-", "-", 0, org_func_name, type_mapping.__str__()) worksheet.append(line) return #序列化AST返回值是一个数组,0元素是序列化的AST字符串,1元素是节点个数,AST字符串以;结尾,需要去掉结尾的; o1 = serializedAST(neo4jdb) o1.data_type_mapping = type_mapping ret = o1.genSerilizedAST(vuln_func) #delete FunctionDef and CompoundStatement node [2:] pattern1 = ";".join(ret[0][2:]) pattern2 = ";".join(ret[1][2:]) pattern3 = ";".join(ret[2][2:]) pattern4 = ";".join(ret[3][2:]) tmp = o1.genSerilizedAST(patched_func) s1 = ";".join(tmp[0]) s2 = ";".join(tmp[0]) s3 = ";".join(tmp[0]) s4 = ";".join(tmp[0]) report = {} if suffix_tree_obj.search(s1, pattern1): report['distinct_type_and_const'] = True else: report['distinct_type_and_const'] = False if suffix_tree_obj.search(s2, pattern2): report['distinct_const_no_type'] = True else: report['distinct_const_no_type'] = False if suffix_tree_obj.search(s3, pattern3): report['distinct_type_no_const'] = True else: report['distinct_type_no_const'] = False if suffix_tree_obj.search(s4, pattern4): report['distinct_type_no_const'] = True else: report['no_type_no_const'] = False end_time = time.time() cost = round(end_time - start_time, 2) line = (vuln_name, patch_name, "success", report['distinct_type_and_const'], report['distinct_const_no_type'], report['distinct_type_no_const'], report['distinct_type_no_const'], cost, org_func_name, type_mapping.__str__()) worksheet.append(line)
def search_vuln_seg_in_func(db1, vuln_seg, vuln_func, var_map, db2, func_name, suffix_obj): print "[%s] processing %s VS %s" % ( datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S"), vuln_seg, func_name) vuln_seg_func = get_function_ast_root(db1, vuln_seg) if vuln_seg_func is None: vuln_seg_func = get_function_ast_root(db1, vuln_func) if vuln_seg_func is None: print "%s %s not found" % (vuln_seg, vuln_func) return (vuln_seg+"-"+vuln_func, func_name, "vuln_not_found") patched_func = get_function_ast_root(db2, func_name) if patched_func is None: print "%s is not found" % func_name return (vuln_seg, func_name, "patch_not_found") o1 = serializedAST(db1) o1.variable_maps = var_map ret = o1.genSerilizedAST(vuln_seg_func) #delete FunctionDef and CompoundStatement node pattern1 = ";".join(ret[0][2:]) pattern2 = ";".join(ret[1][2:]) pattern3 = ";".join(ret[2][2:]) pattern4 = ";".join(ret[3][2:]) pattern5 = ";".join(ret[4][2:]) tmp = serializedAST(db2).genSerilizedAST(patched_func) s1 = ";".join(tmp[0][2:]) s2 = ";".join(tmp[1][2:]) s3 = ";".join(tmp[2][2:]) s4 = ";".join(tmp[3][2:]) s5 = ";".join(tmp[4][2:]) report = {} if suffix_obj.search(s1, pattern1): report['distinct_type_and_const'] = True else: report['distinct_type_and_const'] = False if suffix_obj.search(s2, pattern2): report['distinct_const_no_type'] = True else: report['distinct_const_no_type'] = False if suffix_obj.search(s3, pattern3): report['distinct_type_no_const'] = True else: report['distinct_type_no_const'] = False if suffix_obj.search(s4, pattern4): report['no_type_no_const'] = True else: report['no_type_no_const'] = False if suffix_obj.search(s5, pattern5): report['no_mapping'] = True else: report['no_mapping'] = False #begin cfg # patch_root = get_function_node_by_ast_root(db2, patched_func) # vuln_seg_root = get_function_node_by_ast_root(db1, vuln_seg_func) # match, simi = func_cfg_similarity(patch_root, db2, vuln_seg_root, db1) return (vuln_seg, func_name, "success", report["distinct_type_and_const"], report["distinct_const_no_type"], report["distinct_type_no_const"], report["no_type_no_const"], report['no_mapping'])
def segement_ast_similarity_process( vuln_name, patch_name, neo4jdb, org_func_name, type_mapping, worksheet, suffix_tree_obj ): start_time = time.time() print "[%s] processing %s" % ( datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S"), vuln_name + " vs " + patch_name, ) # 检查数据库里面是否可以找到该函数 vuln_func = get_function_ast_root(neo4jdb, vuln_name) if vuln_func is None: line = ( vuln_name, patch_name, "vuln_func_not_found", "-", "-", "-", "-", 0, org_func_name, type_mapping.__str__(), ) worksheet.append(line) return # 检查数据库里面是否可以找到该函数 patched_func = get_function_ast_root(neo4jdb, patch_name) if patched_func is None: line = ( vuln_name, patch_name, "patch_func_not_found", "-", "-", "-", "-", 0, org_func_name, type_mapping.__str__(), ) worksheet.append(line) return # 序列化AST返回值是一个数组,0元素是序列化的AST字符串,1元素是节点个数,AST字符串以;结尾,需要去掉结尾的; o1 = serializedAST(neo4jdb) o1.data_type_mapping = type_mapping ret = o1.genSerilizedAST(vuln_func) # delete FunctionDef and CompoundStatement node [2:] pattern1 = ";".join(ret[0][2:]) pattern2 = ";".join(ret[1][2:]) pattern3 = ";".join(ret[2][2:]) pattern4 = ";".join(ret[3][2:]) tmp = o1.genSerilizedAST(patched_func) s1 = ";".join(tmp[0]) s2 = ";".join(tmp[0]) s3 = ";".join(tmp[0]) s4 = ";".join(tmp[0]) report = {} if suffix_tree_obj.search(s1, pattern1): report["distinct_type_and_const"] = True else: report["distinct_type_and_const"] = False if suffix_tree_obj.search(s2, pattern2): report["distinct_const_no_type"] = True else: report["distinct_const_no_type"] = False if suffix_tree_obj.search(s3, pattern3): report["distinct_type_no_const"] = True else: report["distinct_type_no_const"] = False if suffix_tree_obj.search(s4, pattern4): report["distinct_type_no_const"] = True else: report["no_type_no_const"] = False end_time = time.time() cost = round(end_time - start_time, 2) line = ( vuln_name, patch_name, "success", report["distinct_type_and_const"], report["distinct_const_no_type"], report["distinct_type_no_const"], report["distinct_type_no_const"], cost, org_func_name, type_mapping.__str__(), ) worksheet.append(line)
def search_vuln_seg_in_func(db1, vuln_seg, vuln_func, var_map, db2, func_name, suffix_obj): print "[%s] processing %s VS %s" % (datetime.datetime.now().strftime( "%y-%m-%d %H:%M:%S"), vuln_seg, func_name) vuln_seg_func = get_function_ast_root(db1, vuln_seg) if vuln_seg_func is None: vuln_seg_func = get_function_ast_root(db1, vuln_func) if vuln_seg_func is None: print "%s %s not found" % (vuln_seg, vuln_func) return (vuln_seg + "-" + vuln_func, func_name, "vuln_not_found") patched_func = get_function_ast_root(db2, func_name) if patched_func is None: print "%s is not found" % func_name return (vuln_seg, func_name, "patch_not_found") o1 = serializedAST(db1) o1.variable_maps = var_map ret = o1.genSerilizedAST(vuln_seg_func) #delete FunctionDef and CompoundStatement node pattern1 = ";".join(ret[0][2:]) pattern2 = ";".join(ret[1][2:]) pattern3 = ";".join(ret[2][2:]) pattern4 = ";".join(ret[3][2:]) pattern5 = ";".join(ret[4][2:]) tmp = serializedAST(db2).genSerilizedAST(patched_func) s1 = ";".join(tmp[0][2:]) s2 = ";".join(tmp[1][2:]) s3 = ";".join(tmp[2][2:]) s4 = ";".join(tmp[3][2:]) s5 = ";".join(tmp[4][2:]) report = {} if suffix_obj.search(s1, pattern1): report['distinct_type_and_const'] = True else: report['distinct_type_and_const'] = False if suffix_obj.search(s2, pattern2): report['distinct_const_no_type'] = True else: report['distinct_const_no_type'] = False if suffix_obj.search(s3, pattern3): report['distinct_type_no_const'] = True else: report['distinct_type_no_const'] = False if suffix_obj.search(s4, pattern4): report['no_type_no_const'] = True else: report['no_type_no_const'] = False if suffix_obj.search(s5, pattern5): report['no_mapping'] = True else: report['no_mapping'] = False #begin cfg # patch_root = get_function_node_by_ast_root(db2, patched_func) # vuln_seg_root = get_function_node_by_ast_root(db1, vuln_seg_func) # match, simi = func_cfg_similarity(patch_root, db2, vuln_seg_root, db1) return (vuln_seg, func_name, "success", report["distinct_type_and_const"], report["distinct_const_no_type"], report["distinct_type_no_const"], report["no_type_no_const"], report['no_mapping'])