def process_func_files(file_path): print file_path, " started.." bitvector_size = config.bloomfilter_size bitvector = bitarray.bitarray(bitvector_size) bitvector_dic = { } # record the slice's hashvalue and the line numbers. eg: {1839273: [1,5,7,9,10], 34502394: [6,7,8,10,11,12]} vul_dic = {} with open(config.vul_repo_file_path, 'r') as f: vul_dic = json.load(f, encoding='gbk') #print "[+]import vul completed." #index = 1 #report_num = 1 if not file_path.endswith(".c"): return # get variable list function = pu.parseFile_deep(file_path) if len(function) == 0: print "The file <", file_path, "> has ", len(function), "functions." return if len(function) != 1: print "The file <", file_path, "> has ", len(function), "functions." # a threshold for function if len(pu.normalize(function[0].funcBody)) < 50: return variable_list = function[0].variableList temp = produce_slice.produce_funcBody_hash(function[0]) if temp == "": return hash_value = temp[0] for vulfunc_file_name, record in vul_dic.items(): if record['hashvalue'][0] == hash_value: lock.acquire() report(file_path, 0, vulfunc_file_name, "", "Bingo(1)") lock.release() return dpd_content = "" func_content = [] if not os.path.exists( os.path.join(config.src_funcDpd_path, os.path.basename(file_path))): return with open( os.path.join(config.src_funcDpd_path, os.path.basename(file_path)), "r") as ff: temp = ff.readlines() dpd_content = "".join("".join(temp).split("\n")) dpd_dic = produce_slice.slice_from_project(dpd_content) with open(file_path, "r") as ff: func_content = ff.readlines() # build a bitvector according to dpd_dic bitvector.setall(0) for line_num, line_dpd in dpd_dic.items(): slice_content = produce_slice.get_slice_content(func_content, line_dpd) if slice_content == []: continue if slice_content == "": print "[Error]The dpd-files wrong." return temp1 = produce_slice.produce_slice_hash(variable_list, slice_content) slice_hash = temp1[0] bitvector[slice_hash] = 1 bitvector_dic[slice_hash] = line_dpd for vul_filename, record in vul_dic.items(): if bitvector[record['hashvalue'][0]] == 1: line_list = bitvector_dic[record['hashvalue'][0]] line_list = list(set(line_list)) line_list.sort() lock.acquire() report(file_path, line_list, vul_filename, "", "Bingo(3)") lock.release() return if len(record['hashvalue']) == 1: continue flag = True matched_hash = [] for n in record['hashvalue'][1:]: if bitvector[n] == 1: matched_hash.append(n) else: flag = False matched_hash = [] break if flag: line_list = [] for i in matched_hash: line_list.extend(bitvector_dic[i]) line_list = list(set(line_list)) line_list.sort() lock.acquire() report(file_path, line_list, vul_filename, record['lineNumber'], "Bingo(2)") lock.release() return return
def detect_source_code(): bitvector_size = config.bloomfilter_size bitvector = bitarray.bitarray(bitvector_size) bitvector_dic = {} # record the slice's hashvalue and the line numbers. eg: {1839273: [1,5,7,9,10], 34502394: [6,7,8,10,11,12]} vul_dic = {} with open(config.vul_repo_file_path, 'r') as f: vul_dic = json.load(f, encoding='gbk') print "[+]import vul completed." if os.path.exists(config.result_path): os.remove(config.result_path) outfile = open(config.result_path, 'a') outfile.write(""" <!DOCTYPE html> <html> <head> <title>Result - Report</title> <style type="text/css"> .container { padding: 3px 3px 3px 3px; font-size: 14px; } .patch { background-color: #CCCCCC; border: 2px solid #555555; margin: 0px 0px 5px 0px } .source { background-color: #DDDDDD; padding: 3px 3px 3px 3px; margin: 0px 0px 5px 0px } .filepath { font-size: small; font-weight: bold; color: #0000AA; padding: 5px 5px 5px 5px; } .codechunk { font-family: monospace; font-size: small; white-space: pre-wrap; padding: 0px 0px 0px 50px; } .linenumber { font-family: monospace; font-size: small; float: left; color: #777777; } </style> <script language="javascript"> function togglePrev(node) { var targetDiv = node.previousSibling; targetDiv.style.display = (targetDiv.style.display=='none')?'block':'none'; node.innerHTML = (node.innerHTML=='+ show +')?'- hide -':'+ show +'; } function toggleNext(node) { var targetDiv = node.nextSibling; targetDiv.style.display = (targetDiv.style.display=='none')?'block':'none'; node.innerHTML = (node.innerHTML=='+ show +')?'- hide -':'+ show +'; } </script> </head> <body> <div style="width: 100%; margin: 0px auto">""") total = 0 for root, dirs, files in os.walk(config.src_func_path): for func_file in files: if not func_file.endswith('.c'): continue total += 1 index = 1 report_num = 1 for root, dirs, files in os.walk(config.src_func_path): for func_file in files: if not func_file.endswith('.c'): continue # first, get the abstracted/normalized func_Body, to detect if the hashvalue of the func_Body is vulnerability. print "-----------------------------------------------------------" print index, "/", total, os.path.join(root, func_file), "started." #if index < 2642: #index += 1 #continue #if func_file != "ssl#~d1_lib.c$dtls1_free$132-181.c": #continue index += 1 start_time = time.time() #get variable list. function = pu.parseFile_deep(os.path.join(root, func_file)) if len(function) == 0: print "The file <", os.path.join(root, func_file), "> has ", len(function), " funcitons." continue if len(function) != 1: print "The file <", os.path.join(root, func_file), "> has ", len(function), " funcitons." # a threshold for function if len(pu.normalize(function[0].funcBody)) < 50: continue variable_list = function[0].variableList parse_time = time.time() print "parse function time:", str(parse_time - start_time), "s." temp = produce_slice.produce_funcBody_hash(function[0]) if temp == "": continue hash_value = temp[0] for vulfunc_file_name, record in vul_dic.items(): if record['hashvalue'][0] == hash_value: print func_file, " Bingo(1) !", "match vul_function:", vulfunc_file_name report(outfile, os.path.join(root, func_file), 0, vulfunc_file_name, "", report_num, "Bingo(1)") report_num += 1 break # if the func_Body is "not" vulnerablity, then produce slices for current function. # Build a bitvector for current function, else: #if 'break' executed, the else will no be executed. type1_time = time.time() print "detect type1 time:", str(type1_time - parse_time), "s." slice_time1 = time.time() dpd_content = "" func_content = [] if not os.path.exists(os.path.join(config.src_funcDpd_path, func_file)): continue with open(os.path.join(config.src_funcDpd_path, func_file), 'r') as ff: temp = ff.readlines() dpd_content = "".join("".join(temp).split('\n')) dpd_dic = produce_slice.slice_from_project(dpd_content) with open(os.path.join(root, func_file), 'r') as ff: func_content = ff.readlines() # build a bitvector according to dpd_dic bitvector.setall(0) flag1 = True for line_num, line_dpd in dpd_dic.items(): slice_content = produce_slice.get_slice_content(func_content, line_dpd) if slice_content == []: continue if slice_content == "": flag1 = False break temp1 = produce_slice.produce_slice_hash(variable_list, slice_content) slice_hash = temp1[0] bitvector[slice_hash] = 1 bitvector_dic[slice_hash] = line_dpd if not flag1: print "[Error]The dpd-files wrong." continue slice_time2 = time.time() print "produce slices time:", str(slice_time2 - slice_time1), "s." detect_time1 = time.time() for vul_filename, records in vul_dic.items(): if bitvector[records['hashvalue'][0]] == 1: print func_file, " Bingo(3) !", vul_filename, "------------" line_list = bitvector_dic[records['hashvalue'][0]] line_list = list(set(line_list)) line_list.sort() report(outfile, os.path.join(root, func_file), line_list, vul_filename, "", report_num, "Bingo(3)") report_num += 1 break if len(records['hashvalue']) == 1: continue flag = True matched_hash = [] for n in records['hashvalue'][1:]: if bitvector[n] == 1: matched_hash.append(n) else: flag = False matched_hash = [] break if flag: print func_file, " Bingo(2) !", vul_filename, records['lineNumber'], "------------" line_list = [] for i in matched_hash: line_list.extend(bitvector_dic[i]) line_list = list(set(line_list)) line_list.sort() report(outfile, os.path.join(root, func_file), line_list, vul_filename, records['lineNumber'], report_num, "Bingo(2)") report_num += 1 break detect_time2 = time.time() print "detect time: ", str(detect_time2 - detect_time1), "s." print "total time:", str(detect_time2 - start_time), "s." outfile.write(""" </div> </body> </html>""") outfile.close()
def parseFile_deep_multi(f): functionInstanceList = pu.parseFile_deep(f, "GUI") return (f, functionInstanceList)