def cal_funcs_similarity(request): if request.method == "GET": rs = func_similarity_reports.objects.all() reports = [] for r in rs: reports.append(cal_reports(r)) return render_to_response( "ast_function_level.html", RequestContext(request, {'reports': reports})) else: if os.path.isdir( os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db", "index")): if is_db_on(): neo4jdb = JoernSteps() try: neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() except: return HttpResponse("连接特征数据库失败,请联系管理员查明原因!") th = Thread(target=vuln_patch_compare_all, args=(neo4jdb, )) th.start() return HttpResponse("启动线程计算中,请稍后查看!") else: return HttpResponse("特征数据库未启动,请先启动特征数据库") else: return HttpResponse("特征数据库不存在")
def query_node_type(): step = JoernSteps() step.setGraphDbURL('http://localhost:7474/db/data/') step.connectToDatabase() # get all of function in database query = """getNodesWithType('Function')""" res = step.runGremlinQuery(query) for function in res: # for one function, get type for every line line_dict = dict() function_node_id = int(function.ref[5:]) # get map of type-location query = """queryNodeIndex("functionId:%i").as("x").statements().as("y").select{it.type}{it.location}""" % function_node_id function_nodes = step.runGremlinQuery(query) for node in function_nodes: # get node type and location type = str(node[0]) location = str(node[1]) if (location != 'None'): loc = str(location).split(':')[0] # find in line_dict if (line_dict.has_key(loc)): temp = line_dict.get(loc) + ' ' + type line_dict[loc] = temp else: line_dict[loc] = type clean_type = cc.AST_type_clean(line_dict, True) # do another query to know which files this function belongs to query = """g.v(%d).in("IS_FILE_OF").filter{it.type=="File"}.filepath""" % function_node_id file_path = step.runGremlinQuery(query) file_name = str(file_path[0]).split('/')[-1]
def func_pdg_comp_view(request): if request.method == "GET": funcs = funcs_sel() infos = pdg_vuln_patch_funcs_report.objects.all() return render_to_response("pdg_comp.html", RequestContext(request,{"funcs":funcs, "infos":infos})) else: vuln_id = request.POST.get("funcs_sel") try: vuln_info = vulnerability_info.objects.get(vuln_id=vuln_id) pdg_vuln_patch_funcs_report.objects.get(vuln_info=vuln_info) return HttpResponse(u"已经计算过该函数") except: if os.path.isdir(os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db", "index")): if is_db_on(): neo4jdb = JoernSteps() try: neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() except: return HttpResponse(u"连接特征数据库失败,请联系管理员查明原因!") th = Thread(target=func_pdg_similarity_proc, args=(vuln_id, neo4jdb)) th.start() return HttpResponse(u"已经启动线程进行计算") else: return HttpResponse(u"特征数据库未启动,请先启动特征数据库") else: return HttpResponse(u"特征数据库不存在")
def runQueryChunk(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() query = """getNodesWithType('Function').id""" res = j.runGremlinQuery(query) flag = 1 CHUNK_SIZE = 51 for chunk in j.chunks(res, CHUNK_SIZE): if (flag): functionTuple = tuple(chunk) functionIdStr = str(functionTuple) functionIdStr = functionIdStr.replace(',', '') functionIdStr = functionIdStr.replace('\'', '') #query = """queryNodeIndex("functionId:%s").as("x").statements().map("functionId","location").as("y").select{it.type}{it}""" % functionIdStr query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % functionIdStr stms = j.runGremlinQuery(query) query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk stmsFiles = j.runGremlinQuery(query) files = dict() for stmsFile in stmsFiles: files[int(stmsFile[0])] = str(stmsFile[1]).split('/')[-1] codes = dict() for stm in stms: functionnodeid = int(stm[2]) loc = stm[1] type = str(stm[0]) if (codes.__contains__(functionnodeid)): codes[functionnodeid].append([loc, type]) else: codeList = [[loc, type]] codes[functionnodeid] = codeList codesList = codes.items() for id, elem in codesList: lineDict = dict() for e in elem: location = str(e[0]) type = e[1] if (location != u'None'): loc = str(location).split(':')[0] if (lineDict.has_key(loc)): temp = lineDict.get(loc) + ' ' + type lineDict[loc] = temp else: lineDict[loc] = type text = getCleanText(lineDict, False) fileName = files.get(id) addInfoToSourceFile(text, fileName) flag += 1 print flag
def getFunctionSimilarity(): # initialize write file analysis = file(my_constant.FUNC_SIMILAIRTY_FILE_NAME, 'wb') analyze_writer = csv.writer(analysis) analyze_writer.writerow(['func_a', 'func_b', 'similarity']) # initialize python-joern instance joern_instance = JoernSteps() joern_instance.addStepsDir("/data/joern-code/query/") joern_instance.setGraphDbURL("http://localhost:7474/db/data/") # connect to database joern_instance.connectToDatabase() # fetch all function info functions_query = '_().getFunctions()' functions_temp = joern_instance.runGremlinQuery(functions_query)[0] len_func = len(functions_temp) # filter some operator reload functions functions = [] for function in functions_temp: # remove namespace before:: function = my_util.removeNamespace(function) if function == '': continue if not function.startswith("operator ") and [function ] not in functions: functions.append([function]) len_func = len(functions) # compute similarity and write back into file func_similarity_dic = {} word_list_dict = {} for i in range(len_func): for j in range(len_func): if i == j: continue similarity, word_list_dict = computeSim(functions[i], functions[j], word_list_dict) # store back if similarity > 0.5: analyze_writer.writerow( [functions[i][0], functions[j][0], similarity]) func_similarity_dic[(functions[i][0], functions[j][0])] = similarity # close files analysis.close() return func_similarity_dic
def query_node_type_chunk(): step = JoernSteps() step.setGraphDbURL('http://localhost:7474/db/data/') step.connectToDatabase() # get function id query = """getNodesWithType('Function').id""" res = step.runGremlinQuery(query) flag = 1 CHUNK_SIZE = 51 for chunk in step.chunks(res, CHUNK_SIZE): function_tuple = tuple(chunk) function_id_str = str(function_tuple).replace(',', '').replace('\'', '') # to know which files this function belongs to query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk stms_files = step.runGremlinQuery(query) files = dict() for stms_file in stms_files: files[int(stms_file[0])] = str(stms_file[1]).split('/')[-1] query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % function_id_str stms = step.runGremlinQuery(query) # get node types codes = dict() for stm in stms: function_node_id = int(stm[2]) loc = stm[1] type = str(stm[0]) if (function_node_id in codes): codes[function_node_id].append([loc, type]) else: codeList = [[loc, type]] codes[function_node_id] = codeList codesList = codes.items() for id, elem in codesList: line_dict = dict() for e in elem: location = str(e[0]) type = e[1] if (location != u'None'): loc = str(location).split(':')[0] if (line_dict.has_key(loc)): temp = line_dict.get(loc) + ' ' + type line_dict[loc] = temp else: line_dict[loc] = type clean_type = cc.AST_type_clean(line_dict, True) fileName = files.get(id)
class DBInterface(object): """Provides database connection""" DATABASE_URL = "http://localhost:7474/db/data/" def __init__(self): self.connection = None def __getConnection(self): print "[+] Creating connection." try: self.connection = JoernSteps() except Exception as e: print "[Error] Cannot instantiate Python-Joern database interface, DBInterface says: {}".format( e.args) return False return True def connectToDB(self): if not self.__getConnection(): return False print "[+] Connecting to the database." self.connection.setGraphDbURL(DBInterface.DATABASE_URL) try: self.connection.connectToDatabase() except Exception as e: print "[Error] Cannot connect to the database, DBInterface says: {}".format( e.args) return False return True def runQuery(self, code): results = None try: results = self.connection.runGremlinQuery(code) except Exception as e: print "[Error] Error occured during query execution, DBInterface says: {}".format( e.args) return None return results
def runQuery(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() query = """getNodesWithType('Function')""" res = j.runGremlinQuery(query) flag = 1 for function in res: if (flag): lineDict = dict() functionnodeid = int(function.ref[5:]) #query = """g.v(%d).functionToAST().astNodes()""" % (functionnodeid) #allNodesOfFunction1 = j.runGremlinQuery(query) query = """queryNodeIndex("functionId:%i").as("x").statements().as("y").select{it.type}{it.location}""" % functionnodeid allNodesOfFunction = j.runGremlinQuery(query) for node in allNodesOfFunction: #print node type = str(node[0]) location = str(node[1]) if (location != 'None'): loc = str(location).split(':')[0] if (lineDict.has_key(loc)): temp = lineDict.get(loc) + ' ' + type lineDict[loc] = temp else: lineDict[loc] = type text = getCleanText(lineDict, False) #print text query = """g.v(%d).in("IS_FILE_OF").filter{it.type=="File"}.filepath""" % functionnodeid filepath = j.runGremlinQuery(query) fileName = str(filepath[0]).split('/')[-1] addInfoToSourceFile(text, fileName) flag += 1 print flag
def produce_file_function_location_triads(file): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() root_nodes = j.runGremlinQuery('queryNodeIndex("type:Function")') start_indices = [] function_names = [] for root_node in root_nodes: locationString = root_node.properties['location'] lineNumber = locationString.split(":")[0] start_indices.append(str(int(lineNumber) - 1)) function_names.append(root_node.properties['name']) triads = [] x = 0 while x < len(start_indices): triads.append( (file.split("/")[-1], function_names[x], start_indices[x])) x = x + 1 return triads
def createdb(coverage_db,json_dbname,joern_url='http://localhost:7474/db/data/'): """ combine coverage information with joern queries and create json db with results""" global j,conn from joern.all import JoernSteps j = JoernSteps() j.setGraphDbURL(joern_url) j.connectToDatabase() conditionals = {} # filename is key if_ids = j.runGremlinQuery('queryNodeIndex("type:IfStatement").id') print "Total number of IfStatements:%d"%len(if_ids) switch_ids = j.runGremlinQuery('queryNodeIndex("type:SwitchStatement").id') print "Total number of SwitchStatement:%d"%len(switch_ids) if_ids += switch_ids conn = sqlite3.connect(coverage_db) cur = conn.cursor() idx = 0 for id in if_ids: # iterate over each conditional and gather branch info conditional = get_conditional_info(id,idx) if conditional == {}: continue idx+=1 sys.stdout.write("Processing conditional %d out of %d total.\r"%(idx,len(if_ids))) sys.stdout.flush() if conditional["filename"] not in conditionals: #group by file name conditionals[conditional["filename"]] = [] conditionals[conditional["filename"]].append(conditional) #now sort them by filenames and line numbers sorted_conditionals = [] for filename in conditionals: conditionals[filename].sort(key = lambda c: c["line"]) sorted_conditionals += conditionals[filename] #save as json json.dump(sorted_conditionals,open(json_dbname,"wb")) print "\nDone!"
def func_pdg_comp_view(request): if request.method == "GET": funcs = funcs_sel() infos = pdg_vuln_patch_funcs_report.objects.all() return render_to_response( "pdg_comp.html", RequestContext(request, { "funcs": funcs, "infos": infos })) else: vuln_id = request.POST.get("funcs_sel") try: vuln_info = vulnerability_info.objects.get(vuln_id=vuln_id) pdg_vuln_patch_funcs_report.objects.get(vuln_info=vuln_info) return HttpResponse(u"已经计算过该函数") except: if os.path.isdir( os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db", "index")): if is_db_on(): neo4jdb = JoernSteps() try: neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() except: return HttpResponse(u"连接特征数据库失败,请联系管理员查明原因!") th = Thread(target=func_pdg_similarity_proc, args=(vuln_id, neo4jdb)) th.start() return HttpResponse(u"已经启动线程进行计算") else: return HttpResponse(u"特征数据库未启动,请先启动特征数据库") else: return HttpResponse(u"特征数据库不存在")
from joern.all import JoernSteps j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') # j.addStepsDir('Use this to inject utility traversals') j.connectToDatabase() res = j.runGremlinQuery('getFunctionsByName("main")') # res = j.runCypherQuery('...') for r in res: print r
import hashlib from yaml import load from joern.all import JoernSteps try: from yaml import CLoader as Loader except ImportError: from yaml import Loader if len(sys.argv) < 2: print "Usage: verify.py <file 1> <file 2> ..." exit(1) j = JoernSteps() j.setGraphDbURL("http://localhost:7474/db/data") j.connectToDatabase() sys.argv.pop(0) print "Running tests:" # tests hashes are encoded in the intermediate path names, this extracts them def extract_paths(paths): paths = map(lambda p: str.split(str(p), "/")[-1], paths) return map(lambda p: str.split(str(p), ".c")[0], paths) all_tests = extract_paths(j.runGremlinQuery("getNodesWithType('File').filepath")) for arg in sys.argv: yaml = load(file(arg, "r"), Loader)
def init_joern(): joern_db = JoernSteps() joern_db.setGraphDbURL("http://localhost:7474/db/data/") joern_db.connectToDatabase() return joern_db
class ManualCCSearch(object): ''' classdocs ''' UNTRUSTED_DATA = """attacker_sources = [ "_GET", "_POST", "_COOKIE", "_REQUEST", "_ENV", "HTTP_ENV_VARS" ]\n""" SQL_QUERY_FUNCS = """sql_query_funcs = [ "mysql_query", "pg_query", "sqlite_query" ]\n""" # Gremlin operations ORDER_LN = ".order{it.a.lineno <=> it.b.lineno}" # Order by linenumber def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) # self.j.addStepsDir( # Configurator.getPath(Configurator.KEY_PYTHON_JOERN) + # "/joern/phpjoernsteps" # ) self.j.addStepsDir( Configurator.getPath(Configurator.KEY_BASE_DIR) + "/custom_gremlin_steps" ) self.j.connectToDatabase() # self.QUERIES_DIR = Configurator.getPath(Configurator.BASE_DIR) + \ # "/gremlin_queries" def searchCCOne(self): """ Search for the first vulnerable tutorial (SQL injection from stackoverflow): $user_alcohol_permitted_selection = $_POST['alcohol_check']; //Value sent using jquery .load() $user_social_club_name_input = $_POST['name']; //Value sent using jquery .load() $query="SELECT * FROM social_clubs WHERE name = $user_social_club_name_input"; if ($user_alcohol_permitted_selection != "???") { $query.= "AND WHERE alcohol_permitted = $user_alcohol_permitted_selection"; } """ # construct gremlin query step by step: # 1. Find variable name X of "variable = $_POST[..]" # 2. Go to next statement list. # (3. Find variable name Y of "variable = $_POST[..]" # (4. Go to next statement list. # 5. Find variable name Z and string str1 of "variable = string" # 6. Check if str1 contains regexp "WHERE any_word=$Y". # (7. Go to next statement list.) # (8. Check for if-statement with variable $X.) # 9. Check if variable $Z is extended using string with regexp # "and where any_word=$X" # (10. Check for mysql_query($Z)) # all nodes # query = "g.V(NODE_TYPE, TYPE_STMT_LIST).out" # # # AST_ASSIGN nodes' right side # query += ".rval" query = "g.V" return query def sqlNewIndirect(self): query = self.UNTRUSTED_DATA + self.SQL_QUERY_FUNCS query += open(self.QUERIES_DIR + "sql_new_indirect.query", 'r').read() return query def runQuery(self, query): return query def runTimedQuery(self, myFunction, query=None): start = time.time() res = None try: if query: res = self.j.runGremlinQuery(myFunction(query)) else: res = self.j.runGremlinQuery(myFunction()) except Exception as err: print "Caught exception:", type(err), err elapsed = time.time() - start # print "Query done in %f seconds." % (elapsed) result = [] try: for node in res: print node data = CodeCloneData() data.stripDataFromOutput(node) data.setQueryTime(elapsed) result.append(data) except TypeError: # res is not iterable, because it is one/no node. # print res if res: data = CodeCloneData() data.stripDataFromOutput(node) data.setQueryTime(elapsed) result.append(data) print res return (result, elapsed)
class get_basic_blocks(): """""" #---------------------------------------------------------------------- def __init__(self, ): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase() self.get_function_list(ChunkStartTool) self.FUNCTION_LIST = {} self.BASIC_BLOCK_LIST = {} """take all the function name and functionId into a dict""" def get_function_list(self, ChunkStartTool): '''query_get_all_functions=""" queryNodeIndex('type:Function AND name:%s').id """ ''' list_function = get_all_functions.ListFuncs( ChunkStartTool) #ListFuncs() list_function.run() self.FUNCTION_LIST = list_function.ALL_FUNCTIONS """given a functionId , this can get the graph of the function """ '''def get_subgraph(functionid): query_get_function_graph=""" queryNodeIndex('functionId:%s').outE """%functionid #query=""" queryNodeIndex('type:Function AND name:%s').id""" function_graphEdges = self.JS.runGremlinQuery(query_get_function_graph) return function_graph ''' """given a functionId , this can get the control flow graph of the function""" def get_cfg_graph(self, functionid): query_get_cfg_graph = """queryNodeIndex('functionId:%s').outE .filter{it.label=="CONTROLS"||it.label=="POST_DOM"} .transform{[it.outV.id,it.id,it.label,it.inV.id]}.toList()""" % functionid function_cfg_graph = self.JS.runGremlinQuery(query_get_cfg_graph) return function_cfg_graph """given a functionId , this can get the ENTRY node id of the function""" def get_ENTRY_node(self, functionid): query_from_entry = """queryNodeIndex('functionId:%s AND type:CFGEntryNode').id""" % functionid #transform{[it.id]}.toList() id_entry = self.JS.runGremlinQuery(query_from_entry) return id_entry[0] """given a nodeId , this can get the code of the node ,result is utf string""" def get_code(self, node_id): query_get_code = """g.v(%s).getProperty("code")""" % node_id query_result = self.JS.runGremlinQuery(query_get_code) return query_result """given a nodeId , this can get the type of the node ,result is utf string""" def get_type(self, node_id): query_get_code = """g.v(%s).getProperty("type")""" % node_id query_result = self.JS.runGremlinQuery(query_get_code) return query_result """given a nodeId , this can get the nodes that be controled ,result are ids""" def get_control_nodes(self, node_id): query_get_control_code = """g.v(%s).out("CONTROLS").id""" % node_id query_result = self.JS.runGremlinQuery(query_get_control_code) control_node_list = [] for r in query_result: control_node_list.append(r) return control_node_list """given a nodeId , this can get the nodes that POST_DOM ,result are ids""" def get_POST_DOM_nodes(self, node_id): query_get_POST_DOM_code = """g.v(%s).out("POST_DOM").id""" % node_id query_result = self.JS.runGremlinQuery(query_get_POST_DOM_code) POST_DOM_node_list = [] for r in query_result: POST_DOM_node_list.append(r) if len(POST_DOM_node_list) < 1: return [] return POST_DOM_node_list[0] """given a nodeId , this can get the nodes that DOM ,result are ids""" def get_DOM_nodes(self, node_id): query_get_POST_DOM_code = """g.v(%s).in("POST_DOM").id""" % node_id query_result = self.JS.runGremlinQuery(query_get_POST_DOM_code) POST_DOM_node_list = [] for r in query_result: POST_DOM_node_list.append(r) if len(POST_DOM_node_list) < 1: return [] return POST_DOM_node_list[0] """check the node not in the two_demission list""" def check_in_or_not(self, node, BBs): in_BBS = False max_i = len(BBs) for eachNum in range(max_i): if node in BBs[eachNum]: in_BBS = True return in_BBS def Dom_list_sort(self, node_list): BBs_sorted = [] BBs = [] for node in node_list: if not self.check_in_or_not(node, BBs): BBs_sorted = [] BBs_sorted.append(node) last_node = node #get_DOM_nodes(node) next_node = node #get_POST_DOM_nodes(node) while self.get_DOM_nodes(last_node) in node_list: BBs_sorted.insert(BBs_sorted.index(last_node), self.get_DOM_nodes(last_node)) last_node = self.get_DOM_nodes(last_node) while self.get_POST_DOM_nodes(next_node) in node_list: BBs_sorted.append(self.get_POST_DOM_nodes(next_node)) next_node = self.get_POST_DOM_nodes(next_node) BBs.append(BBs_sorted) '''for node_list in BBs: for node in node_list: if self.get_type(self.get_POST_DOM_nodes(node))== "Parameter": node_list.remove(node)''' for sub_node_list in BBs: list_tmp = [] for node in sub_node_list: if self.get_type(node) != "Parameter": list_tmp.append(node) BBs.remove(sub_node_list) BBs.append(list_tmp) return BBs '''given a node, return the basic blocks of it''' def get_BBs_of_node(self, node_id): current_control_ids = self.get_control_nodes(node_id) if len(current_control_ids) == 0: return [] else: return self.Dom_list_sort(current_control_ids) '''get the basic blocks of the function''' def function_basic_blocks(self, functionid): basic_block_ids = [] #store the basic blocks node id queue = [] #put the FIFO node entry_id = self.get_ENTRY_node(functionid) queue.append(entry_id) while len(queue) > 0: control_nodes = self.get_control_nodes(queue[0]) queue.remove(queue[0]) if len(control_nodes) > 0: queue = queue + control_nodes basic_block_ids = basic_block_ids + self.Dom_list_sort( control_nodes) return basic_block_ids #self.get_BBs_of_node(entry_id) def execute(self): #for etem in self.function_basic_blocks(54): # print etem[:] #Project_BBs={} self.get_function_list(ChunkStartTool) for etem in self.FUNCTION_LIST.iterkeys(): #self.get_cfg_graph(functionid) self.BASIC_BLOCK_LIST[etem] = self.function_basic_blocks(etem) print "ok"
class get_basic_blocks(): """""" #---------------------------------------------------------------------- def __init__(self, ): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase() self.get_function_list(ChunkStartTool) self.FUNCTION_LIST = {} self.BASIC_BLOCK_LIST = {} """take all the function name and functionId into a dict""" def get_function_list(self, ChunkStartTool): '''query_get_all_functions=""" queryNodeIndex('type:Function AND name:%s').id """ ''' list_function = get_all_functions.ListFuncs( ChunkStartTool) #ListFuncs() list_function.run() self.FUNCTION_LIST = list_function.ALL_FUNCTIONS """given a functionId , this can get the graph of the function """ '''def get_subgraph(functionid): query_get_function_graph=""" queryNodeIndex('functionId:%s').outE """%functionid #query=""" queryNodeIndex('type:Function AND name:%s').id""" function_graphEdges = self.JS.runGremlinQuery(query_get_function_graph) return function_graph ''' """given a functionId , this can get the control flow graph of the function""" def get_cfg_graph(self, functionid): query_get_cfg_graph = """queryNodeIndex('functionId:%s').outE .filter{it.label=="CONTROLS"||it.label=="POST_DOM"} .transform{[it.outV.id,it.id,it.label,it.inV.id]}.toList()""" % functionid function_cfg_graph = self.JS.runGremlinQuery(query_get_cfg_graph) return function_cfg_graph """given a functionId , this can get the ENTRY node id of the function""" def get_ENTRY_node(self, functionid): query_from_entry = """queryNodeIndex('functionId:%s AND type:CFGEntryNode').id""" % functionid #transform{[it.id]}.toList() id_entry = self.JS.runGremlinQuery(query_from_entry) return id_entry[0] """given a nodeId , this can get the code of the node ,result is utf string""" def get_code(self, node_id): query_get_code = """g.v(%s).getProperty("code")""" % node_id query_result = self.JS.runGremlinQuery(query_get_code) return query_result.encode('utf-8') """given a nodeId , this can get the type of the node ,result is utf string""" def get_type(self, node_id): query_get_type = """g.v(%s).getProperty("type")""" % node_id query_result = self.JS.runGremlinQuery(query_get_type) return query_result.encode('utf-8') """given a nodeId , this can get the nodes that be controled ,result are ids""" def get_control_nodes(self, node_id): query_get_control_code = """g.v(%s).out("CONTROLS").id""" % node_id query_result = self.JS.runGremlinQuery(query_get_control_code) control_node_list = [] for r in query_result: control_node_list.append(r) return control_node_list """given a nodeId , this can get the nodes that POST_DOM ,result are ids""" def get_POST_DOM_nodes(self, node_id): query_get_POST_DOM_code = """g.v(%s).out("POST_DOM").id""" % node_id query_result = self.JS.runGremlinQuery(query_get_POST_DOM_code) POST_DOM_node_list = [] for r in query_result: POST_DOM_node_list.append(r) if len(POST_DOM_node_list) < 1: return [] return POST_DOM_node_list """given a nodeId , this can get the nodes that DOM ,result are ids""" def get_DOM_nodes(self, node_id): query_get_POST_DOM_code = """g.v(%s).in("POST_DOM").id""" % node_id query_result = self.JS.runGremlinQuery(query_get_POST_DOM_code) POST_DOM_node_list = [] for r in query_result: POST_DOM_node_list.append(r) if len(POST_DOM_node_list) < 1: return [] return POST_DOM_node_list """check the node not in the two_demission list""" def check_in_or_not(self, node, BBs): in_BBS = False max_i = len(BBs) for eachNum in range(max_i): if node in BBs[eachNum]: in_BBS = True return in_BBS '''this function select the node occur simultaneously''' def list_mix_list(self, list1, list2): for etem in list1: if etem in list2: return etem return None '''this function take the controlled nodes into different basic blocks in order''' def Dom_list_sort(self, node_list): BBs_sorted = [] BBs = [] for node in node_list: if not self.check_in_or_not(node, BBs): BBs_sorted = [] BBs_sorted.append(node) last_node = node #get_DOM_nodes(node) next_node = node #get_POST_DOM_nodes(node) while self.list_mix_list(self.get_DOM_nodes(last_node), node_list) is not None: BBs_sorted.insert( BBs_sorted.index(last_node), self.list_mix_list(self.get_DOM_nodes(last_node), node_list)) last_node = self.list_mix_list( self.get_DOM_nodes(last_node), node_list) while self.list_mix_list(self.get_POST_DOM_nodes(next_node), node_list) is not None: BBs_sorted.append( self.list_mix_list(self.get_POST_DOM_nodes(next_node), node_list)) next_node = self.list_mix_list( self.get_POST_DOM_nodes(next_node), node_list) BBs.append(BBs_sorted) '''for node_list in BBs: for node in node_list: if self.get_type(self.get_POST_DOM_nodes(node))== "Parameter": node_list.remove(node)''' '''for sub_node_list in BBs: list_tmp=[] for node in sub_node_list: if self.get_type(node)!= "Parameter": list_tmp.append(node) BBs.remove(sub_node_list) BBs.append(list_tmp) ''' return BBs '''given a node, return the basic blocks of it''' def get_BBs_of_node(self, node_id): current_control_ids = self.get_control_nodes(node_id) if len(current_control_ids) == 0: return [] else: return self.Dom_list_sort(current_control_ids) '''get the basic blocks of the function''' def function_basic_blocks(self, functionid): basic_block_ids = [] #store the basic blocks node id queue = [] #put the FIFO node entry_id = self.get_ENTRY_node(functionid) queue.append(entry_id) function_BB_code = [] while len(queue) > 0: control_nodes = self.get_control_nodes(queue[0]) tmp_control_nodes = [] for control_node in control_nodes: if not self.check_in_or_not(control_node, basic_block_ids): tmp_control_nodes.append(control_node) '''for control_node in control_nodes: if self.check_in_or_not(control_node, basic_block_ids): control_nodes.remove(control_node)''' queue.remove(queue[0]) if len(tmp_control_nodes) > 0: queue = queue + tmp_control_nodes basic_block_ids = basic_block_ids + self.Dom_list_sort( tmp_control_nodes) '''this can take the parameter nodes away''' for sub_node_list in basic_block_ids: list_tmp = [] for node in sub_node_list: if self.get_type(node) != "Parameter": list_tmp.append(node) basic_block_ids.remove(sub_node_list) basic_block_ids.append(list_tmp) #return basic_block_ids#if do this, we can get the basic block node ids '''Do this can let us get the basic block code.''' for node_list_ids in basic_block_ids: node_list_ids.reverse() node_list_codes = [] for node_id in node_list_ids: node_list_codes.append(self.get_code( node_id)) #we can do get_code or get_type either function_BB_code.append(node_list_codes) return function_BB_code #self.get_BBs_of_node(entry_id) def execute(self): #for etem in self.function_basic_blocks(54): # print etem[:] #Project_BBs={} self.get_function_list(ChunkStartTool) for etem in self.FUNCTION_LIST.iterkeys(): #self.get_cfg_graph(functionid) file_name = FILE_PATH + "/" + etem if not os.path.exists(file_name): f_tmp = open(file_name, mode='w') try: for BB_list in self.function_basic_blocks(etem): for sentence in BB_list: f_tmp.writelines(sentence + "\r") f_tmp.writelines("\r\n") f_tmp.close() except Exception, ex: print etem print Exception.message os.remove(file_name) else: continue #self.BASIC_BLOCK_LIST[etem]=self.function_basic_blocks(etem) print "ok"
class method_3(): """this method takes different features into one vector, but the different place in vector is specified.""" '''[0:the return type of the function, 1:the number of the parameters 2~20:the num of different property type nodes 21~30:the num of different leble edges 31~40:the num of different kinds of API functions 41~45:the num of different kinds of node types,like Callee]''' #---------------------------------------------------------------------- def __init__(self): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase() self.return_type_dataDir = os.path.join(FILE_PATH, 'return_type_data') self.parameter_dataDir = os.path.join(FILE_PATH, 'parameter_data') self.edge_dataDir = os.path.join(FILE_PATH, 'edge_data') self.node_type_dataDir = os.path.join(FILE_PATH, 'node_type_data') self.all_return_type=[] self.all_node_type=[] self.FUNCTION_LIST = {} self.get_function_list(ChunkStartTool) self.get_all_return_type() #---------------------------------------------------------------------- '''def chunks(self, l, n): for i in xrange(0, len(l), n): yield l[i:i+n] """""" ''' """take all the function name and functionId into a dict""" def get_function_list(self,ChunkStartTool): '''query_get_all_functions="""queryNodeIndex('type:Function AND name:%s').id""" ''' list_function = get_all_functions.ListFuncs(ChunkStartTool)#ListFuncs() list_function.run() self.FUNCTION_LIST=list_function.ALL_FUNCTIONS #---------------------------------------------------------------------- def return_type(self,function_id): """get the return_type of current function""" query_return_type = """queryNodeIndex('functionId:%s AND type:ReturnType').code"""%function_id#transform{[it.id]}.toList() return_type_code_u = self.JS.runGremlinQuery(query_return_type) return_type_code=[] for type_code in return_type_code_u: return_type_code.append(type_code.encode("utf-8") ) return return_type_code #---------------------------------------------------------------------- def get_all_return_type(self): """get the return_type of all functions""" for function_id in self.FUNCTION_LIST.iterkeys():#self.chunks(self.FUNCTION_LIST.keys(), CHUNK_SIZE): for current_return_type in self.return_type(function_id): if current_return_type not in self.all_return_type: self.all_return_type.append(current_return_type) #---------------------------------------------------------------------- def get_parameter_type(self,function_id): """get the num of specified function parameters""" query_parameter_type = """queryNodeIndex('functionId:%s AND type:ParameterType').code.toList()"""%function_id parameter_list = self.JS.runGremlinQuery(query_parameter_type) return parameter_list #---------------------------------------------------------------------- def get_different_edge_num(self,function_id): """get the num of different type edges in the specified function""" query_edge_num="""queryNodeIndex('functionId:%s').outE.label"""%function_id edge_label=self.JS.runGremlinQuery(query_edge_num) total_num=len(edge_label) edge_label_dic_standard={} edge_label_dic=dict((a.encode("utf-8"),edge_label.count(a)) for a in edge_label) for key in edge_label_dic.iterkeys(): edge_label_dic_standard[key]=float(edge_label_dic[key]/total_num) #edge_label.count() return edge_label_dic_standard #---------------------------------------------------------------------- def get_all_types(self): """get the types of all nodes""" for function_id in self.FUNCTION_LIST.iterkeys():#self.chunks(self.FUNCTION_LIST.keys(), CHUNK_SIZE): query_node_type = """queryNodeIndex('functionId:%s').type"""%function_id node_types = self.JS.runGremlinQuery(query_node_type) for current_node_type in node_types: if current_node_type.encode("utf-8") not in self.all_node_type: self.all_node_type.append(current_node_type.encode("utf-8")) print "ok" #---------------------------------------------------------------------- def get_different_type_num(self,function_id): """get the num of different node types in the specified function""" query_type_num="""queryNodeIndex('functionId:%s').type"""%function_id type_label=self.JS.runGremlinQuery(query_type_num) total_num=len(type_label) type_dic_standard={} type_dic=dict((a.encode("utf-8"),type_label.count(a)) for a in type_label) for key in type_dic.iterkeys(): type_dic_standard[key]=float(type_dic[key]/total_num) #edge_label.count() return type_dic_standard def output_return_type(self): return_type_writer=Writer() return_type_writer.setOutputDirectory(self.return_type_dataDir) return_type_writer.run() for function_id in self.FUNCTION_LIST.iterkeys(): symbols= self.return_type(function_id) return_type_writer._writeDataPoints(function_id, symbols) return_type_writer._finalizeOutputDirectory() #---------------------------------------------------------------------- def output_parameter_type(self): """""" return_type_writer=Writer() return_type_writer.setOutputDirectory(self.parameter_dataDir) return_type_writer.run() for function_id in self.FUNCTION_LIST.iterkeys(): symbols= self.get_parameter_type(function_id) return_type_writer._writeDataPoints(function_id, symbols) return_type_writer._finalizeOutputDirectory() #---------------------------------------------------------------------- def output_edge_type(self): """""" return_type_writer=Writer() return_type_writer.setOutputDirectory(self.edge_dataDir) return_type_writer.run() for function_id in self.FUNCTION_LIST.iterkeys(): symbols= self.get_different_edge_num(function_id) return_type_writer._writeDataPoints(function_id, symbols) return_type_writer._finalizeOutputDirectory() #for function_id in self.FUNCTION_LIST.iterkeys():#self.chunks(self.FUNCTION_LIST.keys(), CHUNK_SIZE): # current_function_paras=self.get_different_type_num(function_id) # print function_id #self.get_all_types() #---------------------------------------------------------------------- def output_node_type(self): """""" return_type_writer=Writer() return_type_writer.setOutputDirectory(self.node_type_dataDir) return_type_writer.run() for function_id in self.FUNCTION_LIST.iterkeys(): symbols= self.get_different_type_num(function_id) return_type_writer._writeDataPoints(function_id, symbols) return_type_writer._finalizeOutputDirectory() def execute(self): self.output_return_type() self.output_parameter_type() self.output_edge_type() self.output_node_type()
class Analysis(object): SQL_QUERY_FUNCS = """sql_query_funcs = [ "mysql_query", "mysqli_query", "pg_query", "sqlite_query" ]\n""" XSS_FUNCS = """xss_funcs = [ "print", "echo" ]\n""" OS_COMMAND_FUNCS = """os_command_funcs = [ "backticks", "exec" , "expect_popen","passthru","pcntl_exec", "popen","proc_open","shell_exec","system", "mail" ]\n""" def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) self.j.connectToDatabase() def prepareQueryStatic(self, attackType): query = self.XSS_FUNCS + self.SQL_QUERY_FUNCS + self.OS_COMMAND_FUNCS query += " m =[]; " if attackType == "sql": query += """ queryMapList =[]; g.V().filter{sql_query_funcs.contains(it.code) && isCallExpression(it.nameToCall().next()) }.callexpressions() .sideEffect{m = start(it, [], 0, 'sql', false, queryMapList)} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'sql', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'sql', queryMapList, it)}}""" elif attackType == "xss": query += """ queryMapList = []; g.V().filter{it.type == TYPE_ECHO || it.type == TYPE_PRINT} .sideEffect{m = start(it, [], 0, 'xss', false, queryMapList)} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'xss', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'xss', queryMapList, it)}}""" elif attackType == "code": query += """queryMapList =[]; g.V().filter{it.type == TYPE_INCLUDE_OR_EVAL && it.flags.contains(FLAG_EXEC_EVAL)} .sideEffect{m = start(it, [], 0, 'code', false, queryMapList )} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'code', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'code', queryMapList, it)}}""" # command execution : sinks considered are : # [backticks, exec,expect_popen,passthru,pcntl_exec,popen,proc_open,shell_exec,system,mail] elif attackType == "os-command": query += """queryMapList =[] g.V().filter{os_command_funcs.contains(it.code) && isCallExpression(it.nameToCall().next()) }.callexpressions() .filter{os_command_funcs.contains(it.ithChildren(0).out.code.next())} .sideEffect{m = start(it, [], 0, 'os-command', false, queryMapList )} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'os-command', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'os-command', queryMapList, it)}}""" elif attackType == "file-inc": query += """queryMapList =[]; g.V().filter{it.type == TYPE_INCLUDE_OR_EVAL && !(it.flags.contains(FLAG_EXEC_EVAL))} .sideEffect{m = start(it, [], 0, 'file-inc', false, queryMapList)} .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'file-inc', '1')} .sideEffect{ reportmessage = report(it.toFileAbs().next().name, it.lineno, it.id)} .ifThenElse{m.isEmpty()} {it.transform{reportmessage}} {it.transform{findSinkLocation(m, warnmessage, 'file-inc', queryMapList, it)}}""" elif attackType == "ear": query += """ g.V().filter{ "header" == it.code && isCallExpression(it.nameToCall().next()) }.callexpressions() .ithChildren(1).astNodes() .filter{it.code != null && it.code.startsWith("Location")} .callexpressions() .as('call') .out('FLOWS_TO') .filter{it.type != "AST_EXIT" && it.type != "NULL" } .or( _().filter{it.type == "AST_CALL"} .sideEffect{n = jumpToCallingFunction(it)} .filter{n.type != "AST_EXIT" && n.type != "NULL" && n.type != "AST_RETURN"} , _().filter{it.type == "AST_CALL"} .sideEffect{n = jumpToCallingFunction(it)} .filter{n.type == "AST_RETURN"} .out('FLOWS_TO') .filter{n.type != "AST_EXIT" && n.type != "NULL" } , _().filter{it.type != "AST_CALL"} , _().as('b') .filter{it.type == "AST_CALL"} .astNodes() .filter{it.code != null && it.code != "/home/user/log/codeCoverage.txt"} .back('b') ) .back('call') .sideEffect{ warnmessage = warning(it.toFileAbs().next().name, it.lineno, it.id, 'ear', '1')} .transform{warnmessage}""" return query def prepareFinalQuery(self, seed): get = [] for g in seed.get: if '=' in g: t = g[0:g.find('=')] get.append('?' + t + '=') get.append('&' + t + '=') params = [] for p in seed.params: if '=' in p: params.append(p[0:p.find('=')] + '=') query = """g.V('url', '%s') .findNavigationSeq(%s, %s, %s).dedup().path""" % ( seed.src, seed.dst, get, params) print(query) # {it.url} return query def runQuery(self, query): return query def runTimedQuery(self, query): start = time.time() res = None try: if query: res = self.j.runGremlinQuery(query) except Exception as err: print "Caught exception:", type(err), err elapsed = time.time() - start timestr = "Query done in %f seconds." % (elapsed) return (res, timestr) def readExploitSeedsFile(self, attackType): if attackType == "sql": print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_xss.txt' ) file = '/home/user/navex/results/include_map_resolution_results.txt' elif attackType == "xss": file = '/home/user/navex/results/include_map_resolution_results_xss.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_xss.txt' ) elif attackType == "code": file = '/home/user/navex/results/include_map_resolution_results_code.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_code.txt' ) elif attackType == "os-command": file = '/home/user/navex/results/include_map_resolution_results_os-command.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_os-command.txt' ) elif attackType == "file-inc": file = '/home/user/navex/results/include_map_resolution_results_file-inc.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_file-inc.txt' ) elif attackType == "ear": file = '/home/user/navex/results/include_map_resolution_results_ear.txt' print( 'Reading Exploit Seeds File in /home/user/navex/results/include_map_resolution_results_ear.txt' ) with open(file, 'r') as f: lines = [line.strip() for line in f] return lines
#coding=utf-8 ''' Created on Jan 4, 2016 @author: root ''' from algorithm.util import vuln_patch_compare from astLevel_algorithm.models import vulnerability_info from joern.all import JoernSteps if __name__ == "__main__": objects = vulnerability_info.objects.all() neo4jdb = JoernSteps() neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() for obj in objects: vuln_patch_compare(obj.vuln_id, neo4jdb)
def bug_finder(request): if request.method == "GET": software_sel = software_sel_form() return render_to_response( "bug_finder.html", RequestContext(request, {"software_sel": software_sel})) else: if request.POST.has_key("sel_vuln"): soft_id = int(request.POST.get("software")) soft_name = softwares.objects.get( software_id=soft_id).software_name #查询当前软件(不含版本)所涉及的所有漏洞函数 softs = softwares.objects.filter(software_name=soft_name) #先查到涉及的所有cve cves = [] for soft in softs: cves.extend(soft.cve_infos_set.all()) #查到涉及的所有漏洞 sel_vuln = vulnerability_info.objects.filter(cve_info__in=cves, is_in_db=True) software_sel = software_sel_form(request.POST) return render_to_response( "bug_finder.html", RequestContext(request, { "sel_vuln": sel_vuln, "software_sel": software_sel })) elif request.POST.has_key("find"): if not is_db_on(): return HttpResponse(u"特征数据库未启动,请先启动特征数据库") soft = softwares.objects.get( software_id=int(request.POST.get("software"))) try: db = graph_dbs.objects.get(soft=soft) #检测软件数据库是否启动 if not is_db_on(db.port): return HttpResponse("软件图形数据库未启动") #连接软件数据库 soft_db = JoernSteps() try: soft_db.setGraphDbURL("http://localhost:%d/db/data/" % db.port) soft_db.connectToDatabase() except: return HttpResponse("连接软件数据库失败! port:%d" % db.port) #连接特征数据库 character_db = JoernSteps() try: character_db.setGraphDbURL( "http://localhost:7474/db/data/") character_db.connectToDatabase() except: return HttpResponse("连接特征数据库失败!") #根据选择使用不同的算法 alg = request.POST.get("algorithm") if alg == "CFG": th = Thread(target=func_similarity_cfgLevel_proc, args=(soft, soft_db, character_db, request.POST.getlist("vuln_infos"))) th.start() elif alg == "PDG": th = Thread(target=func_similarity_pdgLevel_proc, args=(soft, soft_db, character_db, request.POST.getlist("vuln_infos"))) th.start() return HttpResponse("已启动线程进行计算,请等候!") except graph_dbs.DoesNotExist: return HttpResponse("软件图形数据库未生成")
def connect_db(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() return j
"PrimaryExpression": "$COS", "CallExpression": "$CAL", "Condition": "$CON", "Identifier": "$VAR", "CastExpression": "$CAT", "OrExpression": "$OP", "IncDecOp": "$OP", "UnaryOp": "$UOP", "AdditiveExpression": "$ADD", "ArrayIndexing":"$IDX" } return switcher.get(x, 'null') j=JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') # j.addStepsDir('Use this to inject utility traversals') j.connectToDatabase() ptrlist=open('/home/hongfa/workspace/thttpd_workspace/ptrList','r') ptrs=ptrlist.readlines() for ptr in ptrs: #print ptr functionID = ptr.split("functionId:")[1] nodeID = ptr.split(" ")[0] print nodeID
def bug_finder(request): if request.method == "GET": software_sel = software_sel_form() return render_to_response("bug_finder.html", RequestContext(request, {"software_sel":software_sel})) else: if request.POST.has_key("sel_vuln"): soft_id = int(request.POST.get("software")) soft_name = softwares.objects.get(software_id=soft_id).software_name #查询当前软件(不含版本)所涉及的所有漏洞函数 softs = softwares.objects.filter(software_name = soft_name) #先查到涉及的所有cve cves = [] for soft in softs: cves.extend(soft.cve_infos_set.all()) #查到涉及的所有漏洞 sel_vuln = vulnerability_info.objects.filter(cve_info__in = cves, is_in_db=True) software_sel = software_sel_form(request.POST) return render_to_response("bug_finder.html", RequestContext(request, {"sel_vuln":sel_vuln,"software_sel":software_sel})) elif request.POST.has_key("find"): if not is_db_on(): return HttpResponse(u"特征数据库未启动,请先启动特征数据库") soft = softwares.objects.get(software_id=int(request.POST.get("software"))) try: db = graph_dbs.objects.get(soft=soft) #检测软件数据库是否启动 if not is_db_on(db.port): return HttpResponse("软件图形数据库未启动") #连接软件数据库 soft_db = JoernSteps() try: soft_db.setGraphDbURL("http://localhost:%d/db/data/" % db.port) soft_db.connectToDatabase() except: return HttpResponse("连接软件数据库失败! port:%d" % db.port) #连接特征数据库 character_db = JoernSteps() try: character_db.setGraphDbURL("http://localhost:7474/db/data/") character_db.connectToDatabase() except: return HttpResponse("连接特征数据库失败!") #根据选择使用不同的算法 alg = request.POST.get("algorithm") if alg == "CFG": th = Thread(target=func_similarity_cfgLevel_proc, args=(soft, soft_db, character_db, request.POST.getlist("vuln_infos"))) th.start() elif alg == "PDG": th = Thread(target=func_similarity_pdgLevel_proc, args=(soft, soft_db, character_db, request.POST.getlist("vuln_infos"))) th.start() return HttpResponse("已启动线程进行计算,请等候!") except graph_dbs.DoesNotExist: return HttpResponse("软件图形数据库未生成")
def produce_nodes_string(): def queryParent(j, nodeId): j.connectToDatabase() parent = j.runGremlinQuery('g.v(' + str(nodeId) + ').parents()') return parent def getStringForNode(node, nodes_and_parents): global global_node_types parent = nodes_and_parents[node] code = str(parent[0].properties['code']).replace(',', '') code = code.replace('¬', '') parentString = parent[0].properties[ 'type'] + "," + code + "," + str( parent[0].properties['functionId']) + "," + str( parent[0].properties['childNum']) parent_identifier = hash(tuple(parentString)) code = str(node.properties['code']).replace(',', '') code = code.replace('¬', '') nodeString = node.properties['type'] + "," + code + "," + str( node.properties['functionId']) + "," + str( node.properties['childNum']) node_identifier = hash(tuple(nodeString)) addition_string = str(node_identifier) + "," + str( node.properties['type']) + "," + str( node.properties['code']) + "," + str( node.properties['functionId']) + "," + str( node.properties['childNum']) + "," + str( parent_identifier) + "¬" #global_node_types.add(node.properties['type']) return addition_string syntactical_features = [] #max_depth_ast = get_max_depth_ast() #ast_node_types_tfs = get_node_types_tfs() #ast_node_types_tfidfs = get_node_types_tfidfs() #ast_node_type_avg_depths = get_node_type_avg_depth() #keywords_term_frequency = get_keywords_term_frequency() global global_node_types ast_features = [0] * 57 all_nodes_string = "" j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() root_nodes = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef")') all_ast_nodes = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef").astNodes()') ast_parents = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef").astNodes().parents()') nodes_and_parents = {} for node in all_ast_nodes: nodes_and_parents[node] = queryParent(j, node._id) for node in all_ast_nodes: if not node in root_nodes: all_nodes_string += getStringForNode( node, nodes_and_parents) for ast_node in all_ast_nodes: x = 0 while x < len(global_node_types): if global_node_types[x] == ast_node.properties['type']: ast_features[x] += 1 x = x + 1 continue else: x = x + 1 #print(ast_features) #print(all_nodes_string) return all_nodes_string, ast_features
class get_source(): """""" #---------------------------------------------------------------------- def __init__(self): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase() #self.source_sink_path=[] #self.sink_source_tree=tree() """given a nodeId , this can get the type of the node ,result is utf string""" def get_type(self, node_id): query_get_type = """g.v(%s).getProperty("type")""" % node_id query_result = self.JS.runGremlinQuery(query_get_type) return query_result.encode('utf-8') '''given a node that means sink, this can get the sources in the function''' def get_source_within_func(self, node_id): get_source_query = """g.v(%s).sources().id""" % node_id #getArguments("printf", "1") query_result = self.JS.runGremlinQuery(get_source_query) source_node_list = [] #store the source nodes source_node_tmp = [] #store the source nodes temply for r in query_result: if self.get_type(r) != "Parameter": print "the source node %s is not the \"Parameter\" type" else: source_node_list.append(r) return source_node_list '''given a node that means sink, this can get the sources in the function''' def get_source_of_IdentifierDeclStatement(self, node_id): source_node_list = [] #store the source nodes source_node_tmp = [] #store the source nodes temply if self.get_type(node_id) == "IdentifierDeclStatement": get_source_query = """g.v(%s).out("USE").filter{it.type=="Symbol"}.in("DEF").id""" % node_id query_result = self.JS.runGremlinQuery(get_source_query) for r in query_result: if self.get_type(r) != "Parameter": print "the source node %s is not the \"Parameter\" type" else: source_node_list.append(r) return source_node_list '''get the source parameter's Identifier node''' def get_Identifier_source(self, node_id): get_Identifier_source = """g.v(%s).out("IS_AST_PARENT").filter{it.type=="Identifier"}.id""" % node_id #getArguments("printf", "1") source_Identifier_node = self.JS.runGremlinQuery(get_Identifier_source) return source_Identifier_node '''get the source parameter's Identifier node''' def get_ParameterType_source(self, node_id): get_ParameterType_source = """g.v(%s).out("IS_AST_PARENT").filter{it.type=="ParameterType"}.id""" % node_id #getArguments("printf", "1") source_ParameterType_node = self.JS.runGremlinQuery( get_ParameterType_source) return source_ParameterType_node '''given a node that means sink, this can get the sources out of the function''' def get_source_between_func(self, node_id): get_source_query = """g.v(%s).in("IS_ARG").id""" % node_id #getArguments("printf", "1") query_result = self.JS.runGremlinQuery(get_source_query) source_node_list = [] #store the source nodes #source_node_tmp=[]#store the source nodes temply for r in query_result: if r not in source_node_list: source_node_list.append(r) return source_node_list '''get the sink nodes of specified function''' def get_sink(self, function_name, arg_num): sink_query = """getArguments(\"%s\", \"%s\").id""" % (function_name, arg_num) query_result = self.JS.runGremlinQuery(sink_query) sink_node_list = [] for r in query_result: if r not in sink_node_list: sink_node_list.append(r) return sink_node_list '''decide whether the node is over''' def wether_is_over(self, node_id): leble = False '''the symbol has not def nodes''' if self.get_type(node_id) == "Symbol": query = """g.v(%s).in("DEF").id""" % node_id query_result = self.JS.runGremlinQuery(query) if len(query_result) == 0: leble = True '''the PARAMETER has not Identifier nodes''' if self.get_type(node_id) == "Parameter": query = """g.v(%s).out("IS_AST_PARENT").filter{it.type=="Identifier"}.in("IS_ARG").id""" % node_id query_result = self.JS.runGremlinQuery(query) if len(query_result) == 0: leble = True '''the PARAMETER has not Identifier nodes''' if self.get_type(node_id) == "IdentifierDeclStatement": query = """g.v(%s).out("USE").filter{it.type=="Symbol"}.in("DEF").id""" % node_id query_result = self.JS.runGremlinQuery(query) if len(query_result) == 0: leble = True '''the PARAMETER has not Identifier nodes''' if self.get_type(node_id) == "Identifier": query = """g.v(%s).out.id""" % node_id query_result = self.JS.runGremlinQuery(query) query_has_arg = """g.v(%s).in("IS_ARG").id""" % node_id query_result1 = self.JS.runGremlinQuery(query_has_arg) if len(query_result) == 0 and len(query_result1) == 0: leble = True return leble '''get the source node of specified sink node''' def get_source(self, node): #source_current_func=get_source_within_func(node_id) #node.add(node_id) node_id = node.getdata() last_source = node_id if not self.wether_is_over(last_source): ###sources() get the parameter nodes''' if self.get_type(last_source) == "Argument": last_source_tmp = self.get_source_within_func(last_source) last_sources = last_source_tmp for last_source in last_sources: node.add(tree.node(last_source)) for source_node in last_sources: identifier_node = self.get_Identifier_source(source_node) if identifier_node is not None: for neighbor_source in identifier_node: node.add(self.get_source(neighbor_source)) ###this should get the arguement nodes''' elif len(self.get_source_between_func(last_source)) != 0: last_source_tmp = self.get_source_between_func(last_source) last_source = last_source_tmp node.add(last_source) for source_node in last_source: identifier_node = self.get_Identifier_source(source_node) if identifier_node is not None: for neighbor_source in identifier_node: node.add(self.get_source(neighbor_source)) ###sources() get the IdentifierDeclStatement nodes''' elif len(self.get_source_of_IdentifierDeclStatement( last_source)) != 0: last_source_tmp = self.get_source_of_IdentifierDeclStatement( last_source) last_source = last_source_tmp node.add(last_source) for source_node in last_source: identifier_node = self.get_Identifier_source(source_node) if identifier_node is not None: for neighbor_source in identifier_node: node.add(self.get_source(neighbor_source)) def execute(self): sink_func = sys.argv[0] arg_num = sys.argv[1] #root_node = tree.node(sink_func) for node_id in self.get_sink(sink_func, arg_num): print "Now, the sink node id is ", node_id sink_node = tree.node(node_id) self.sink_source_tree._head.add(sink_node) current_path = [] self.get_source(sink_node) print "ok" '''current_node.add(node_id)