def cal_funcs_similarity(request): if request.method == "GET": rs = func_similarity_reports.objects.all() reports = [] for r in rs: reports.append(cal_reports(r)) return render_to_response( "ast_function_level.html", RequestContext(request, {'reports': reports})) else: if os.path.isdir( os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db", "index")): if is_db_on(): neo4jdb = JoernSteps() try: neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() except: return HttpResponse("连接特征数据库失败,请联系管理员查明原因!") th = Thread(target=vuln_patch_compare_all, args=(neo4jdb, )) th.start() return HttpResponse("启动线程计算中,请稍后查看!") else: return HttpResponse("特征数据库未启动,请先启动特征数据库") else: return HttpResponse("特征数据库不存在")
def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) self.j.connectToDatabase()
def query_node_type(): step = JoernSteps() step.setGraphDbURL('http://localhost:7474/db/data/') step.connectToDatabase() # get all of function in database query = """getNodesWithType('Function')""" res = step.runGremlinQuery(query) for function in res: # for one function, get type for every line line_dict = dict() function_node_id = int(function.ref[5:]) # get map of type-location query = """queryNodeIndex("functionId:%i").as("x").statements().as("y").select{it.type}{it.location}""" % function_node_id function_nodes = step.runGremlinQuery(query) for node in function_nodes: # get node type and location type = str(node[0]) location = str(node[1]) if (location != 'None'): loc = str(location).split(':')[0] # find in line_dict if (line_dict.has_key(loc)): temp = line_dict.get(loc) + ' ' + type line_dict[loc] = temp else: line_dict[loc] = type clean_type = cc.AST_type_clean(line_dict, True) # do another query to know which files this function belongs to query = """g.v(%d).in("IS_FILE_OF").filter{it.type=="File"}.filepath""" % function_node_id file_path = step.runGremlinQuery(query) file_name = str(file_path[0]).split('/')[-1]
class jutils: joern = JoernSteps() @staticmethod def connectToDatabase(): jutils.joern.connectToDatabase() jutils.joern.addStepsDir(CHUCKY_STEPS_DIR) @staticmethod def lookup(lucene_query, traversal=None, projection=None): node_selection = "queryNodeIndex('{}')".format(lucene_query) return jutils.raw_lookup(node_selection, traversal, projection) @staticmethod def raw_lookup(node_selection, traversal=None, projection=None): if not projection: attributes = ['it.id', 'it'] else: f = lambda x: 'it.{}'.format(x) attributes = map(f, projection) transform = "transform{{ [ {} ] }}".format(', '.join(attributes)) if not traversal: command = '.'.join([node_selection, transform]) else: command = '.'.join([node_selection, traversal, transform]) return jutils.joern.runGremlinQuery(command) @staticmethod def runGremlinCommands(commands): command = '; '.join(commands) return jutils.joern.runGremlinQuery(command)
def runQueryChunk(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() query = """getNodesWithType('Function').id""" res = j.runGremlinQuery(query) flag = 1 CHUNK_SIZE = 51 for chunk in j.chunks(res, CHUNK_SIZE): if (flag): functionTuple = tuple(chunk) functionIdStr = str(functionTuple) functionIdStr = functionIdStr.replace(',', '') functionIdStr = functionIdStr.replace('\'', '') #query = """queryNodeIndex("functionId:%s").as("x").statements().map("functionId","location").as("y").select{it.type}{it}""" % functionIdStr query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % functionIdStr stms = j.runGremlinQuery(query) query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk stmsFiles = j.runGremlinQuery(query) files = dict() for stmsFile in stmsFiles: files[int(stmsFile[0])] = str(stmsFile[1]).split('/')[-1] codes = dict() for stm in stms: functionnodeid = int(stm[2]) loc = stm[1] type = str(stm[0]) if (codes.__contains__(functionnodeid)): codes[functionnodeid].append([loc, type]) else: codeList = [[loc, type]] codes[functionnodeid] = codeList codesList = codes.items() for id, elem in codesList: lineDict = dict() for e in elem: location = str(e[0]) type = e[1] if (location != u'None'): loc = str(location).split(':')[0] if (lineDict.has_key(loc)): temp = lineDict.get(loc) + ' ' + type lineDict[loc] = temp else: lineDict[loc] = type text = getCleanText(lineDict, False) fileName = files.get(id) addInfoToSourceFile(text, fileName) flag += 1 print flag
def __init__(self, ): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase() self.get_function_list(ChunkStartTool) self.FUNCTION_LIST = {} self.BASIC_BLOCK_LIST = {}
def __getConnection(self): print "[+] Creating connection." try: self.connection = JoernSteps() except Exception as e: print "[Error] Cannot instantiate Python-Joern database interface, DBInterface says: {}".format( e.args) return False return True
def getFunctionSimilarity(): # initialize write file analysis = file(my_constant.FUNC_SIMILAIRTY_FILE_NAME, 'wb') analyze_writer = csv.writer(analysis) analyze_writer.writerow(['func_a', 'func_b', 'similarity']) # initialize python-joern instance joern_instance = JoernSteps() joern_instance.addStepsDir("/data/joern-code/query/") joern_instance.setGraphDbURL("http://localhost:7474/db/data/") # connect to database joern_instance.connectToDatabase() # fetch all function info functions_query = '_().getFunctions()' functions_temp = joern_instance.runGremlinQuery(functions_query)[0] len_func = len(functions_temp) # filter some operator reload functions functions = [] for function in functions_temp: # remove namespace before:: function = my_util.removeNamespace(function) if function == '': continue if not function.startswith("operator ") and [function ] not in functions: functions.append([function]) len_func = len(functions) # compute similarity and write back into file func_similarity_dic = {} word_list_dict = {} for i in range(len_func): for j in range(len_func): if i == j: continue similarity, word_list_dict = computeSim(functions[i], functions[j], word_list_dict) # store back if similarity > 0.5: analyze_writer.writerow( [functions[i][0], functions[j][0], similarity]) func_similarity_dic[(functions[i][0], functions[j][0])] = similarity # close files analysis.close() return func_similarity_dic
def query_node_type_chunk(): step = JoernSteps() step.setGraphDbURL('http://localhost:7474/db/data/') step.connectToDatabase() # get function id query = """getNodesWithType('Function').id""" res = step.runGremlinQuery(query) flag = 1 CHUNK_SIZE = 51 for chunk in step.chunks(res, CHUNK_SIZE): function_tuple = tuple(chunk) function_id_str = str(function_tuple).replace(',', '').replace('\'', '') # to know which files this function belongs to query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk stms_files = step.runGremlinQuery(query) files = dict() for stms_file in stms_files: files[int(stms_file[0])] = str(stms_file[1]).split('/')[-1] query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % function_id_str stms = step.runGremlinQuery(query) # get node types codes = dict() for stm in stms: function_node_id = int(stm[2]) loc = stm[1] type = str(stm[0]) if (function_node_id in codes): codes[function_node_id].append([loc, type]) else: codeList = [[loc, type]] codes[function_node_id] = codeList codesList = codes.items() for id, elem in codesList: line_dict = dict() for e in elem: location = str(e[0]) type = e[1] if (location != u'None'): loc = str(location).split(':')[0] if (line_dict.has_key(loc)): temp = line_dict.get(loc) + ' ' + type line_dict[loc] = temp else: line_dict[loc] = type clean_type = cc.AST_type_clean(line_dict, True) fileName = files.get(id)
def __init__(self): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase() self.return_type_dataDir = os.path.join(FILE_PATH, 'return_type_data') self.parameter_dataDir = os.path.join(FILE_PATH, 'parameter_data') self.edge_dataDir = os.path.join(FILE_PATH, 'edge_data') self.node_type_dataDir = os.path.join(FILE_PATH, 'node_type_data') self.all_return_type=[] self.all_node_type=[] self.FUNCTION_LIST = {} self.get_function_list(ChunkStartTool) self.get_all_return_type()
def __init__(self, port): ''' Constructor ''' self.j = JoernSteps() self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port))) # self.j.addStepsDir( # Configurator.getPath(Configurator.KEY_PYTHON_JOERN) + # "/joern/phpjoernsteps" # ) self.j.addStepsDir( Configurator.getPath(Configurator.KEY_BASE_DIR) + "/custom_gremlin_steps" ) self.j.connectToDatabase()
def produce_file_function_location_triads(file): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() root_nodes = j.runGremlinQuery('queryNodeIndex("type:Function")') start_indices = [] function_names = [] for root_node in root_nodes: locationString = root_node.properties['location'] lineNumber = locationString.split(":")[0] start_indices.append(str(int(lineNumber) - 1)) function_names.append(root_node.properties['name']) triads = [] x = 0 while x < len(start_indices): triads.append( (file.split("/")[-1], function_names[x], start_indices[x])) x = x + 1 return triads
def runQuery(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() query = """getNodesWithType('Function')""" res = j.runGremlinQuery(query) flag = 1 for function in res: if (flag): lineDict = dict() functionnodeid = int(function.ref[5:]) #query = """g.v(%d).functionToAST().astNodes()""" % (functionnodeid) #allNodesOfFunction1 = j.runGremlinQuery(query) query = """queryNodeIndex("functionId:%i").as("x").statements().as("y").select{it.type}{it.location}""" % functionnodeid allNodesOfFunction = j.runGremlinQuery(query) for node in allNodesOfFunction: #print node type = str(node[0]) location = str(node[1]) if (location != 'None'): loc = str(location).split(':')[0] if (lineDict.has_key(loc)): temp = lineDict.get(loc) + ' ' + type lineDict[loc] = temp else: lineDict[loc] = type text = getCleanText(lineDict, False) #print text query = """g.v(%d).in("IS_FILE_OF").filter{it.type=="File"}.filepath""" % functionnodeid filepath = j.runGremlinQuery(query) fileName = str(filepath[0]).split('/')[-1] addInfoToSourceFile(text, fileName) flag += 1 print flag
def createdb(coverage_db,json_dbname,joern_url='http://localhost:7474/db/data/'): """ combine coverage information with joern queries and create json db with results""" global j,conn from joern.all import JoernSteps j = JoernSteps() j.setGraphDbURL(joern_url) j.connectToDatabase() conditionals = {} # filename is key if_ids = j.runGremlinQuery('queryNodeIndex("type:IfStatement").id') print "Total number of IfStatements:%d"%len(if_ids) switch_ids = j.runGremlinQuery('queryNodeIndex("type:SwitchStatement").id') print "Total number of SwitchStatement:%d"%len(switch_ids) if_ids += switch_ids conn = sqlite3.connect(coverage_db) cur = conn.cursor() idx = 0 for id in if_ids: # iterate over each conditional and gather branch info conditional = get_conditional_info(id,idx) if conditional == {}: continue idx+=1 sys.stdout.write("Processing conditional %d out of %d total.\r"%(idx,len(if_ids))) sys.stdout.flush() if conditional["filename"] not in conditionals: #group by file name conditionals[conditional["filename"]] = [] conditionals[conditional["filename"]].append(conditional) #now sort them by filenames and line numbers sorted_conditionals = [] for filename in conditionals: conditionals[filename].sort(key = lambda c: c["line"]) sorted_conditionals += conditionals[filename] #save as json json.dump(sorted_conditionals,open(json_dbname,"wb")) print "\nDone!"
def func_pdg_comp_view(request): if request.method == "GET": funcs = funcs_sel() infos = pdg_vuln_patch_funcs_report.objects.all() return render_to_response( "pdg_comp.html", RequestContext(request, { "funcs": funcs, "infos": infos })) else: vuln_id = request.POST.get("funcs_sel") try: vuln_info = vulnerability_info.objects.get(vuln_id=vuln_id) pdg_vuln_patch_funcs_report.objects.get(vuln_info=vuln_info) return HttpResponse(u"已经计算过该函数") except: if os.path.isdir( os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db", "index")): if is_db_on(): neo4jdb = JoernSteps() try: neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() except: return HttpResponse(u"连接特征数据库失败,请联系管理员查明原因!") th = Thread(target=func_pdg_similarity_proc, args=(vuln_id, neo4jdb)) th.start() return HttpResponse(u"已经启动线程进行计算") else: return HttpResponse(u"特征数据库未启动,请先启动特征数据库") else: return HttpResponse(u"特征数据库不存在")
def Tran(x): switcher ={ "PrimaryExpression": "$COS", "CallExpression": "$CAL", "Condition": "$CON", "Identifier": "$VAR", "CastExpression": "$CAT", "OrExpression": "$OP", "IncDecOp": "$OP", "UnaryOp": "$UOP", "AdditiveExpression": "$ADD", "ArrayIndexing":"$IDX" } return switcher.get(x, 'null') j=JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') # j.addStepsDir('Use this to inject utility traversals') j.connectToDatabase() ptrlist=open('/home/hongfa/workspace/thttpd_workspace/ptrList','r') ptrs=ptrlist.readlines() for ptr in ptrs: #print ptr functionID = ptr.split("functionId:")[1]
def connect_db(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() return j
def setUp(self): self.j = JoernSteps() self.j.connectToDatabase()
def init_joern(): joern_db = JoernSteps() joern_db.setGraphDbURL("http://localhost:7474/db/data/") joern_db.connectToDatabase() return joern_db
def _initDatabaseConnection(self): self.j = JoernSteps() self.j.connectToDatabase() self.j.addStepsDir('steps/')
def __init__(self): self.j = JoernSteps() self.init_database_connection()
#coding=utf-8 ''' Created on Jan 4, 2016 @author: root ''' from algorithm.util import vuln_patch_compare from astLevel_algorithm.models import vulnerability_info from joern.all import JoernSteps if __name__ == "__main__": objects = vulnerability_info.objects.all() neo4jdb = JoernSteps() neo4jdb.setGraphDbURL('http://localhost:7474/db/data/') neo4jdb.connectToDatabase() for obj in objects: vuln_patch_compare(obj.vuln_id, neo4jdb)
def connectToDatabase(self): self.j = JoernSteps() self.j.addStepsDir(JOERN_TOOLS_STEPDIR) self.j.connectToDatabase()
def _init_joern_interface(self, step_dir=None): self._joern = JoernSteps() if step_dir: self._joern.addStepsDir(step_dir) self._joern.connectToDatabase() self.__is_initialized = True
def bug_finder(request): if request.method == "GET": software_sel = software_sel_form() return render_to_response( "bug_finder.html", RequestContext(request, {"software_sel": software_sel})) else: if request.POST.has_key("sel_vuln"): soft_id = int(request.POST.get("software")) soft_name = softwares.objects.get( software_id=soft_id).software_name #查询当前软件(不含版本)所涉及的所有漏洞函数 softs = softwares.objects.filter(software_name=soft_name) #先查到涉及的所有cve cves = [] for soft in softs: cves.extend(soft.cve_infos_set.all()) #查到涉及的所有漏洞 sel_vuln = vulnerability_info.objects.filter(cve_info__in=cves, is_in_db=True) software_sel = software_sel_form(request.POST) return render_to_response( "bug_finder.html", RequestContext(request, { "sel_vuln": sel_vuln, "software_sel": software_sel })) elif request.POST.has_key("find"): if not is_db_on(): return HttpResponse(u"特征数据库未启动,请先启动特征数据库") soft = softwares.objects.get( software_id=int(request.POST.get("software"))) try: db = graph_dbs.objects.get(soft=soft) #检测软件数据库是否启动 if not is_db_on(db.port): return HttpResponse("软件图形数据库未启动") #连接软件数据库 soft_db = JoernSteps() try: soft_db.setGraphDbURL("http://localhost:%d/db/data/" % db.port) soft_db.connectToDatabase() except: return HttpResponse("连接软件数据库失败! port:%d" % db.port) #连接特征数据库 character_db = JoernSteps() try: character_db.setGraphDbURL( "http://localhost:7474/db/data/") character_db.connectToDatabase() except: return HttpResponse("连接特征数据库失败!") #根据选择使用不同的算法 alg = request.POST.get("algorithm") if alg == "CFG": th = Thread(target=func_similarity_cfgLevel_proc, args=(soft, soft_db, character_db, request.POST.getlist("vuln_infos"))) th.start() elif alg == "PDG": th = Thread(target=func_similarity_pdgLevel_proc, args=(soft, soft_db, character_db, request.POST.getlist("vuln_infos"))) th.start() return HttpResponse("已启动线程进行计算,请等候!") except graph_dbs.DoesNotExist: return HttpResponse("软件图形数据库未生成")
def produce_nodes_string(): def queryParent(j, nodeId): j.connectToDatabase() parent = j.runGremlinQuery('g.v(' + str(nodeId) + ').parents()') return parent def getStringForNode(node, nodes_and_parents): global global_node_types parent = nodes_and_parents[node] code = str(parent[0].properties['code']).replace(',', '') code = code.replace('¬', '') parentString = parent[0].properties[ 'type'] + "," + code + "," + str( parent[0].properties['functionId']) + "," + str( parent[0].properties['childNum']) parent_identifier = hash(tuple(parentString)) code = str(node.properties['code']).replace(',', '') code = code.replace('¬', '') nodeString = node.properties['type'] + "," + code + "," + str( node.properties['functionId']) + "," + str( node.properties['childNum']) node_identifier = hash(tuple(nodeString)) addition_string = str(node_identifier) + "," + str( node.properties['type']) + "," + str( node.properties['code']) + "," + str( node.properties['functionId']) + "," + str( node.properties['childNum']) + "," + str( parent_identifier) + "¬" #global_node_types.add(node.properties['type']) return addition_string syntactical_features = [] #max_depth_ast = get_max_depth_ast() #ast_node_types_tfs = get_node_types_tfs() #ast_node_types_tfidfs = get_node_types_tfidfs() #ast_node_type_avg_depths = get_node_type_avg_depth() #keywords_term_frequency = get_keywords_term_frequency() global global_node_types ast_features = [0] * 57 all_nodes_string = "" j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() root_nodes = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef")') all_ast_nodes = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef").astNodes()') ast_parents = j.runGremlinQuery( 'queryNodeIndex("type:FunctionDef").astNodes().parents()') nodes_and_parents = {} for node in all_ast_nodes: nodes_and_parents[node] = queryParent(j, node._id) for node in all_ast_nodes: if not node in root_nodes: all_nodes_string += getStringForNode( node, nodes_and_parents) for ast_node in all_ast_nodes: x = 0 while x < len(global_node_types): if global_node_types[x] == ast_node.properties['type']: ast_features[x] += 1 x = x + 1 continue else: x = x + 1 #print(ast_features) #print(all_nodes_string) return all_nodes_string, ast_features
def __init__(self): """Constructor""" self.JS = JoernSteps() self.JS.setGraphDbURL(NEO4J_URL) self.JS.connectToDatabase()