def runQueryChunk(): j = JoernSteps() j.setGraphDbURL('http://localhost:7474/db/data/') j.connectToDatabase() query = """getNodesWithType('Function').id""" res = j.runGremlinQuery(query) flag = 1 CHUNK_SIZE = 51 for chunk in j.chunks(res, CHUNK_SIZE): if (flag): functionTuple = tuple(chunk) functionIdStr = str(functionTuple) functionIdStr = functionIdStr.replace(',', '') functionIdStr = functionIdStr.replace('\'', '') #query = """queryNodeIndex("functionId:%s").as("x").statements().map("functionId","location").as("y").select{it.type}{it}""" % functionIdStr query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % functionIdStr stms = j.runGremlinQuery(query) query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk stmsFiles = j.runGremlinQuery(query) files = dict() for stmsFile in stmsFiles: files[int(stmsFile[0])] = str(stmsFile[1]).split('/')[-1] codes = dict() for stm in stms: functionnodeid = int(stm[2]) loc = stm[1] type = str(stm[0]) if (codes.__contains__(functionnodeid)): codes[functionnodeid].append([loc, type]) else: codeList = [[loc, type]] codes[functionnodeid] = codeList codesList = codes.items() for id, elem in codesList: lineDict = dict() for e in elem: location = str(e[0]) type = e[1] if (location != u'None'): loc = str(location).split(':')[0] if (lineDict.has_key(loc)): temp = lineDict.get(loc) + ' ' + type lineDict[loc] = temp else: lineDict[loc] = type text = getCleanText(lineDict, False) fileName = files.get(id) addInfoToSourceFile(text, fileName) flag += 1 print flag
class DBInterface: def connectToDatabase(self): self.j = JoernSteps() self.j.addStepsDir(JOERN_TOOLS_STEPDIR) self.j.connectToDatabase() def runGremlinQuery(self, query): return self.j.runGremlinQuery(query) def chunks(self, ids, chunkSize): return self.j.chunks(ids, chunkSize)
def query_node_type_chunk(): step = JoernSteps() step.setGraphDbURL('http://localhost:7474/db/data/') step.connectToDatabase() # get function id query = """getNodesWithType('Function').id""" res = step.runGremlinQuery(query) flag = 1 CHUNK_SIZE = 51 for chunk in step.chunks(res, CHUNK_SIZE): function_tuple = tuple(chunk) function_id_str = str(function_tuple).replace(',', '').replace('\'', '') # to know which files this function belongs to query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk stms_files = step.runGremlinQuery(query) files = dict() for stms_file in stms_files: files[int(stms_file[0])] = str(stms_file[1]).split('/')[-1] query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % function_id_str stms = step.runGremlinQuery(query) # get node types codes = dict() for stm in stms: function_node_id = int(stm[2]) loc = stm[1] type = str(stm[0]) if (function_node_id in codes): codes[function_node_id].append([loc, type]) else: codeList = [[loc, type]] codes[function_node_id] = codeList codesList = codes.items() for id, elem in codesList: line_dict = dict() for e in elem: location = str(e[0]) type = e[1] if (location != u'None'): loc = str(location).split(':')[0] if (line_dict.has_key(loc)): temp = line_dict.get(loc) + ' ' + type line_dict[loc] = temp else: line_dict[loc] = type clean_type = cc.AST_type_clean(line_dict, True) fileName = files.get(id)
#!/usr/bin/env python ############################################################# # A template for feature extraction for functions with joern. # Author: Fabian Yamaguchi ############################################################# from joern.all import JoernSteps j = JoernSteps() j.connectToDatabase() j.addStepsDir('steps/') statementIds = j.runGremlinQuery("queryNodeIndex('type:Function').id") for chunk in j.chunks(statementIds, 256): query = """ idListToNodes(%s).transform{ [it.id, it.name, it.functionToFeatureVec() ] } """ % (chunk) X = j.runGremlinQuery(query) for x in X: print '===' print 'FunctionId: %d' % (x[0]) print 'FunctionName: %s' % (x[1]) print 'Features (list): %s' % (x[2]) print '==='