コード例 #1
0
def runQueryChunk():

    j = JoernSteps()
    j.setGraphDbURL('http://localhost:7474/db/data/')
    j.connectToDatabase()

    query = """getNodesWithType('Function').id"""
    res = j.runGremlinQuery(query)
    flag = 1
    CHUNK_SIZE = 51

    for chunk in j.chunks(res, CHUNK_SIZE):
        if (flag):
            functionTuple = tuple(chunk)
            functionIdStr = str(functionTuple)
            functionIdStr = functionIdStr.replace(',', '')
            functionIdStr = functionIdStr.replace('\'', '')

            #query = """queryNodeIndex("functionId:%s").as("x").statements().map("functionId","location").as("y").select{it.type}{it}""" % functionIdStr
            query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % functionIdStr
            stms = j.runGremlinQuery(query)

            query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk
            stmsFiles = j.runGremlinQuery(query)
            files = dict()
            for stmsFile in stmsFiles:
                files[int(stmsFile[0])] = str(stmsFile[1]).split('/')[-1]

            codes = dict()
            for stm in stms:
                functionnodeid = int(stm[2])
                loc = stm[1]
                type = str(stm[0])

                if (codes.__contains__(functionnodeid)):
                    codes[functionnodeid].append([loc, type])
                else:
                    codeList = [[loc, type]]
                    codes[functionnodeid] = codeList

            codesList = codes.items()
            for id, elem in codesList:
                lineDict = dict()
                for e in elem:
                    location = str(e[0])
                    type = e[1]

                    if (location != u'None'):
                        loc = str(location).split(':')[0]
                        if (lineDict.has_key(loc)):
                            temp = lineDict.get(loc) + ' ' + type
                            lineDict[loc] = temp
                        else:
                            lineDict[loc] = type
                text = getCleanText(lineDict, False)
                fileName = files.get(id)
                addInfoToSourceFile(text, fileName)
            flag += 1
            print flag
コード例 #2
0
ファイル: DBInterface.py プロジェクト: vlad902/joern-tools
class DBInterface:
    def connectToDatabase(self):
        self.j = JoernSteps()
        self.j.addStepsDir(JOERN_TOOLS_STEPDIR)
        self.j.connectToDatabase()

    def runGremlinQuery(self, query):
        return self.j.runGremlinQuery(query)

    def chunks(self, ids, chunkSize):
        return self.j.chunks(ids, chunkSize)
コード例 #3
0
def query_node_type_chunk():
    step = JoernSteps()
    step.setGraphDbURL('http://localhost:7474/db/data/')
    step.connectToDatabase()

    # get function id
    query = """getNodesWithType('Function').id"""
    res = step.runGremlinQuery(query)
    flag = 1
    CHUNK_SIZE = 51

    for chunk in step.chunks(res, CHUNK_SIZE):
        function_tuple = tuple(chunk)
        function_id_str = str(function_tuple).replace(',', '').replace('\'', '')

        # to know which files this function belongs to
        query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk
        stms_files = step.runGremlinQuery(query)
        files = dict()
        for stms_file in stms_files:
            files[int(stms_file[0])] = str(stms_file[1]).split('/')[-1]

        query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % function_id_str
        stms = step.runGremlinQuery(query)
        # get node types
        codes = dict()
        for stm in stms:
            function_node_id = int(stm[2])
            loc = stm[1]
            type = str(stm[0])
            if (function_node_id in codes):
                codes[function_node_id].append([loc, type])
            else:
                codeList = [[loc, type]]
                codes[function_node_id] = codeList

        codesList = codes.items()
        for id, elem in codesList:
            line_dict = dict()
            for e in elem:
                location = str(e[0])
                type = e[1]
                if (location != u'None'):
                    loc = str(location).split(':')[0]
                    if (line_dict.has_key(loc)):
                        temp = line_dict.get(loc) + ' ' + type
                        line_dict[loc] = temp
                    else:
                        line_dict[loc] = type
            clean_type = cc.AST_type_clean(line_dict, True)
            fileName = files.get(id)
コード例 #4
0
#!/usr/bin/env python

#############################################################
# A template for feature extraction for functions with joern.
# Author: Fabian Yamaguchi
#############################################################

from joern.all import JoernSteps

j = JoernSteps()
j.connectToDatabase()

j.addStepsDir('steps/')

statementIds = j.runGremlinQuery("queryNodeIndex('type:Function').id")

for chunk in j.chunks(statementIds, 256):
    query = """
    idListToNodes(%s).transform{ [it.id, it.name, it.functionToFeatureVec() ] }
    """ % (chunk)

    X = j.runGremlinQuery(query)
    for x in X:
        print '==='
        print 'FunctionId: %d' % (x[0])
        print 'FunctionName: %s' % (x[1])
        print 'Features (list): %s' % (x[2])
        print '==='