コード例 #1
0
ファイル: views.py プロジェクト: jiweizhi/code-similarity
def cal_funcs_similarity(request):
    if request.method == "GET":
        rs = func_similarity_reports.objects.all()
        reports = []
        for r in rs:
            reports.append(cal_reports(r))

        return render_to_response(
            "ast_function_level.html",
            RequestContext(request, {'reports': reports}))
    else:
        if os.path.isdir(
                os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db",
                             "index")):
            if is_db_on():
                neo4jdb = JoernSteps()
                try:
                    neo4jdb.setGraphDbURL('http://localhost:7474/db/data/')
                    neo4jdb.connectToDatabase()
                except:
                    return HttpResponse("连接特征数据库失败,请联系管理员查明原因!")

                th = Thread(target=vuln_patch_compare_all, args=(neo4jdb, ))
                th.start()
                return HttpResponse("启动线程计算中,请稍后查看!")
            else:
                return HttpResponse("特征数据库未启动,请先启动特征数据库")
        else:
            return HttpResponse("特征数据库不存在")
コード例 #2
0
 def __init__(self, port):
     '''
     Constructor
     '''
     self.j = JoernSteps()
     self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port)))
     self.j.connectToDatabase()
コード例 #3
0
def query_node_type():
    step = JoernSteps()
    step.setGraphDbURL('http://localhost:7474/db/data/')
    step.connectToDatabase()

    # get all of function in database
    query = """getNodesWithType('Function')"""
    res = step.runGremlinQuery(query)
    for function in res:
        # for one function, get type for every line
        line_dict = dict()
        function_node_id = int(function.ref[5:])
        # get map of type-location
        query = """queryNodeIndex("functionId:%i").as("x").statements().as("y").select{it.type}{it.location}""" % function_node_id
        function_nodes = step.runGremlinQuery(query)
        for node in function_nodes:
            # get node type and location
            type = str(node[0])
            location = str(node[1])
            if (location != 'None'):
                loc = str(location).split(':')[0]
                # find in line_dict
                if (line_dict.has_key(loc)):
                    temp = line_dict.get(loc) + ' ' + type
                    line_dict[loc] = temp
                else:
                    line_dict[loc] = type

        clean_type = cc.AST_type_clean(line_dict, True)
        # do another query to know which files this function belongs to
        query = """g.v(%d).in("IS_FILE_OF").filter{it.type=="File"}.filepath""" % function_node_id
        file_path = step.runGremlinQuery(query)
        file_name = str(file_path[0]).split('/')[-1]
コード例 #4
0
class jutils:
    joern = JoernSteps()

    @staticmethod
    def connectToDatabase():
        jutils.joern.connectToDatabase()
        jutils.joern.addStepsDir(CHUCKY_STEPS_DIR)

    @staticmethod
    def lookup(lucene_query, traversal=None, projection=None):
        node_selection = "queryNodeIndex('{}')".format(lucene_query)
        return jutils.raw_lookup(node_selection, traversal, projection)

    @staticmethod
    def raw_lookup(node_selection, traversal=None, projection=None):
        if not projection:
            attributes = ['it.id', 'it']
        else:
            f = lambda x: 'it.{}'.format(x)
            attributes = map(f, projection)
        transform = "transform{{ [ {} ] }}".format(', '.join(attributes))

        if not traversal:
            command = '.'.join([node_selection, transform])
        else:
            command = '.'.join([node_selection, traversal, transform])

        return jutils.joern.runGremlinQuery(command)

    @staticmethod
    def runGremlinCommands(commands):
        command = '; '.join(commands)
        return jutils.joern.runGremlinQuery(command)
コード例 #5
0
def runQueryChunk():

    j = JoernSteps()
    j.setGraphDbURL('http://localhost:7474/db/data/')
    j.connectToDatabase()

    query = """getNodesWithType('Function').id"""
    res = j.runGremlinQuery(query)
    flag = 1
    CHUNK_SIZE = 51

    for chunk in j.chunks(res, CHUNK_SIZE):
        if (flag):
            functionTuple = tuple(chunk)
            functionIdStr = str(functionTuple)
            functionIdStr = functionIdStr.replace(',', '')
            functionIdStr = functionIdStr.replace('\'', '')

            #query = """queryNodeIndex("functionId:%s").as("x").statements().map("functionId","location").as("y").select{it.type}{it}""" % functionIdStr
            query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % functionIdStr
            stms = j.runGremlinQuery(query)

            query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk
            stmsFiles = j.runGremlinQuery(query)
            files = dict()
            for stmsFile in stmsFiles:
                files[int(stmsFile[0])] = str(stmsFile[1]).split('/')[-1]

            codes = dict()
            for stm in stms:
                functionnodeid = int(stm[2])
                loc = stm[1]
                type = str(stm[0])

                if (codes.__contains__(functionnodeid)):
                    codes[functionnodeid].append([loc, type])
                else:
                    codeList = [[loc, type]]
                    codes[functionnodeid] = codeList

            codesList = codes.items()
            for id, elem in codesList:
                lineDict = dict()
                for e in elem:
                    location = str(e[0])
                    type = e[1]

                    if (location != u'None'):
                        loc = str(location).split(':')[0]
                        if (lineDict.has_key(loc)):
                            temp = lineDict.get(loc) + ' ' + type
                            lineDict[loc] = temp
                        else:
                            lineDict[loc] = type
                text = getCleanText(lineDict, False)
                fileName = files.get(id)
                addInfoToSourceFile(text, fileName)
            flag += 1
            print flag
コード例 #6
0
 def __init__(self, ):
     """Constructor"""
     self.JS = JoernSteps()
     self.JS.setGraphDbURL(NEO4J_URL)
     self.JS.connectToDatabase()
     self.get_function_list(ChunkStartTool)
     self.FUNCTION_LIST = {}
     self.BASIC_BLOCK_LIST = {}
コード例 #7
0
    def __getConnection(self):

        print "[+] Creating connection."
        try:
            self.connection = JoernSteps()
        except Exception as e:
            print "[Error] Cannot instantiate Python-Joern database interface, DBInterface says: {}".format(
                e.args)
            return False

        return True
コード例 #8
0
ファイル: similarity_func.py プロジェクト: niuxu18/logTracker
def getFunctionSimilarity():

    # initialize write file
    analysis = file(my_constant.FUNC_SIMILAIRTY_FILE_NAME, 'wb')
    analyze_writer = csv.writer(analysis)
    analyze_writer.writerow(['func_a', 'func_b', 'similarity'])

    # initialize python-joern instance
    joern_instance = JoernSteps()
    joern_instance.addStepsDir("/data/joern-code/query/")
    joern_instance.setGraphDbURL("http://localhost:7474/db/data/")
    # connect to database
    joern_instance.connectToDatabase()

    # fetch all function info
    functions_query = '_().getFunctions()'
    functions_temp = joern_instance.runGremlinQuery(functions_query)[0]
    len_func = len(functions_temp)

    # filter some operator reload functions
    functions = []
    for function in functions_temp:
        # remove namespace before::
        function = my_util.removeNamespace(function)
        if function == '':
            continue
        if not function.startswith("operator ") and [function
                                                     ] not in functions:
            functions.append([function])

    len_func = len(functions)
    # compute similarity and write back into file
    func_similarity_dic = {}
    word_list_dict = {}
    for i in range(len_func):
        for j in range(len_func):
            if i == j:
                continue
            similarity, word_list_dict = computeSim(functions[i], functions[j],
                                                    word_list_dict)
            # store back
            if similarity > 0.5:
                analyze_writer.writerow(
                    [functions[i][0], functions[j][0], similarity])
                func_similarity_dic[(functions[i][0],
                                     functions[j][0])] = similarity

    # close files
    analysis.close()

    return func_similarity_dic
コード例 #9
0
def query_node_type_chunk():
    step = JoernSteps()
    step.setGraphDbURL('http://localhost:7474/db/data/')
    step.connectToDatabase()

    # get function id
    query = """getNodesWithType('Function').id"""
    res = step.runGremlinQuery(query)
    flag = 1
    CHUNK_SIZE = 51

    for chunk in step.chunks(res, CHUNK_SIZE):
        function_tuple = tuple(chunk)
        function_id_str = str(function_tuple).replace(',', '').replace('\'', '')

        # to know which files this function belongs to
        query = """idListToNodes(%s).as("x").in("IS_FILE_OF").filepath.as("y").select{it.id}{it}""" % chunk
        stms_files = step.runGremlinQuery(query)
        files = dict()
        for stms_file in stms_files:
            files[int(stms_file[0])] = str(stms_file[1]).split('/')[-1]

        query = """queryNodeIndex("functionId:%s").as("x").statements().as("y").as("z").select{it.type}{it.location}{it.functionId}""" % function_id_str
        stms = step.runGremlinQuery(query)
        # get node types
        codes = dict()
        for stm in stms:
            function_node_id = int(stm[2])
            loc = stm[1]
            type = str(stm[0])
            if (function_node_id in codes):
                codes[function_node_id].append([loc, type])
            else:
                codeList = [[loc, type]]
                codes[function_node_id] = codeList

        codesList = codes.items()
        for id, elem in codesList:
            line_dict = dict()
            for e in elem:
                location = str(e[0])
                type = e[1]
                if (location != u'None'):
                    loc = str(location).split(':')[0]
                    if (line_dict.has_key(loc)):
                        temp = line_dict.get(loc) + ' ' + type
                        line_dict[loc] = temp
                    else:
                        line_dict[loc] = type
            clean_type = cc.AST_type_clean(line_dict, True)
            fileName = files.get(id)
コード例 #10
0
 def __init__(self):
     """Constructor"""
     self.JS = JoernSteps()
     self.JS.setGraphDbURL(NEO4J_URL)
     self.JS.connectToDatabase() 
     self.return_type_dataDir = os.path.join(FILE_PATH, 'return_type_data') 
     self.parameter_dataDir = os.path.join(FILE_PATH, 'parameter_data') 
     self.edge_dataDir = os.path.join(FILE_PATH, 'edge_data') 
     self.node_type_dataDir = os.path.join(FILE_PATH, 'node_type_data')
     self.all_return_type=[]
     self.all_node_type=[]
     self.FUNCTION_LIST = {}       
     self.get_function_list(ChunkStartTool)
     self.get_all_return_type()
コード例 #11
0
ファイル: manual_search.py プロジェクト: zer0yu/ccdetection
    def __init__(self, port):
        '''
        Constructor
        '''
        self.j = JoernSteps()
        self.j.setGraphDbURL('http://localhost:%d/db/data/' % (int(port)))
#         self.j.addStepsDir(
#                         Configurator.getPath(Configurator.KEY_PYTHON_JOERN) + 
#                         "/joern/phpjoernsteps"
#                         )
        
        self.j.addStepsDir(
                        Configurator.getPath(Configurator.KEY_BASE_DIR) +
                        "/custom_gremlin_steps"
                           )
        self.j.connectToDatabase()
コード例 #12
0
def produce_file_function_location_triads(file):
    j = JoernSteps()
    j.setGraphDbURL('http://localhost:7474/db/data/')
    j.connectToDatabase()
    root_nodes = j.runGremlinQuery('queryNodeIndex("type:Function")')
    start_indices = []
    function_names = []
    for root_node in root_nodes:
        locationString = root_node.properties['location']
        lineNumber = locationString.split(":")[0]
        start_indices.append(str(int(lineNumber) - 1))
        function_names.append(root_node.properties['name'])

    triads = []

    x = 0
    while x < len(start_indices):
        triads.append(
            (file.split("/")[-1], function_names[x], start_indices[x]))
        x = x + 1

    return triads
コード例 #13
0
def runQuery():

    j = JoernSteps()
    j.setGraphDbURL('http://localhost:7474/db/data/')
    j.connectToDatabase()

    query = """getNodesWithType('Function')"""
    res = j.runGremlinQuery(query)
    flag = 1
    for function in res:
        if (flag):
            lineDict = dict()
            functionnodeid = int(function.ref[5:])
            #query = """g.v(%d).functionToAST().astNodes()""" % (functionnodeid)
            #allNodesOfFunction1 = j.runGremlinQuery(query)

            query = """queryNodeIndex("functionId:%i").as("x").statements().as("y").select{it.type}{it.location}""" % functionnodeid
            allNodesOfFunction = j.runGremlinQuery(query)

            for node in allNodesOfFunction:
                #print node
                type = str(node[0])
                location = str(node[1])
                if (location != 'None'):
                    loc = str(location).split(':')[0]
                    if (lineDict.has_key(loc)):
                        temp = lineDict.get(loc) + ' ' + type
                        lineDict[loc] = temp
                    else:
                        lineDict[loc] = type

            text = getCleanText(lineDict, False)
            #print text
            query = """g.v(%d).in("IS_FILE_OF").filter{it.type=="File"}.filepath""" % functionnodeid
            filepath = j.runGremlinQuery(query)
            fileName = str(filepath[0]).split('/')[-1]
            addInfoToSourceFile(text, fileName)
        flag += 1
        print flag
コード例 #14
0
def createdb(coverage_db,json_dbname,joern_url='http://localhost:7474/db/data/'):
    """ combine coverage information with joern queries and create json db with results"""
    global j,conn
    from joern.all import JoernSteps
    j = JoernSteps()
    j.setGraphDbURL(joern_url)
    j.connectToDatabase()
    conditionals = {} # filename is key    
    if_ids =  j.runGremlinQuery('queryNodeIndex("type:IfStatement").id')
    print "Total number of IfStatements:%d"%len(if_ids)

    switch_ids = j.runGremlinQuery('queryNodeIndex("type:SwitchStatement").id')    
    print "Total number of SwitchStatement:%d"%len(switch_ids)    
    if_ids += switch_ids

    conn = sqlite3.connect(coverage_db)
    cur = conn.cursor()
    idx = 0
    
    for id in if_ids: # iterate over each conditional and gather branch info
        conditional = get_conditional_info(id,idx)
        if conditional == {}: 
           	continue
        idx+=1
        sys.stdout.write("Processing conditional %d out of %d total.\r"%(idx,len(if_ids)))
        sys.stdout.flush()
        if conditional["filename"] not in conditionals: #group by file name
            conditionals[conditional["filename"]] = []
        conditionals[conditional["filename"]].append(conditional)
    #now sort them by filenames and line numbers 
    sorted_conditionals = []
    for filename in conditionals:
        conditionals[filename].sort(key = lambda c: c["line"])
        sorted_conditionals += conditionals[filename]
    #save as json
    json.dump(sorted_conditionals,open(json_dbname,"wb"))
    print "\nDone!"
コード例 #15
0
def func_pdg_comp_view(request):
    if request.method == "GET":
        funcs = funcs_sel()
        infos = pdg_vuln_patch_funcs_report.objects.all()
        return render_to_response(
            "pdg_comp.html",
            RequestContext(request, {
                "funcs": funcs,
                "infos": infos
            }))
    else:
        vuln_id = request.POST.get("funcs_sel")
        try:
            vuln_info = vulnerability_info.objects.get(vuln_id=vuln_id)
            pdg_vuln_patch_funcs_report.objects.get(vuln_info=vuln_info)
            return HttpResponse(u"已经计算过该函数")
        except:
            if os.path.isdir(
                    os.path.join(settings.NEO4J_DATABASE_PATH, "vuln_db",
                                 "index")):
                if is_db_on():
                    neo4jdb = JoernSteps()
                    try:
                        neo4jdb.setGraphDbURL('http://localhost:7474/db/data/')
                        neo4jdb.connectToDatabase()
                    except:
                        return HttpResponse(u"连接特征数据库失败,请联系管理员查明原因!")

                    th = Thread(target=func_pdg_similarity_proc,
                                args=(vuln_id, neo4jdb))
                    th.start()
                    return HttpResponse(u"已经启动线程进行计算")
                else:
                    return HttpResponse(u"特征数据库未启动,请先启动特征数据库")
            else:
                return HttpResponse(u"特征数据库不存在")
コード例 #16
0
def Tran(x):
    switcher ={
        "PrimaryExpression": "$COS",
        "CallExpression": "$CAL",
        "Condition": "$CON",
        "Identifier": "$VAR",
        "CastExpression": "$CAT",
        "OrExpression": "$OP",
        "IncDecOp": "$OP",
        "UnaryOp": "$UOP",
        "AdditiveExpression": "$ADD",
        "ArrayIndexing":"$IDX"
    }
    return switcher.get(x, 'null')

j=JoernSteps()

j.setGraphDbURL('http://localhost:7474/db/data/')

# j.addStepsDir('Use this to inject utility traversals')

j.connectToDatabase()

ptrlist=open('/home/hongfa/workspace/thttpd_workspace/ptrList','r')

ptrs=ptrlist.readlines()

for ptr in ptrs:

    #print ptr
    functionID = ptr.split("functionId:")[1]
コード例 #17
0
def connect_db():
    j = JoernSteps()
    j.setGraphDbURL('http://localhost:7474/db/data/')
    j.connectToDatabase()
    return j
コード例 #18
0
 def setUp(self):
     self.j = JoernSteps()
     self.j.connectToDatabase()
コード例 #19
0
def init_joern():
	joern_db = JoernSteps()
	joern_db.setGraphDbURL("http://localhost:7474/db/data/")
	joern_db.connectToDatabase()
	return joern_db
コード例 #20
0
    def _initDatabaseConnection(self):

        self.j = JoernSteps()
        self.j.connectToDatabase()
        self.j.addStepsDir('steps/')
コード例 #21
0
 def __init__(self):
     self.j = JoernSteps()
     self.init_database_connection()
コード例 #22
0
ファイル: test.py プロジェクト: jiweizhi/code-similarity
#coding=utf-8
'''
Created on Jan 4, 2016

@author: root
'''

from algorithm.util import vuln_patch_compare
from astLevel_algorithm.models import vulnerability_info
from joern.all import JoernSteps

if __name__ == "__main__":
    objects = vulnerability_info.objects.all()
    
    neo4jdb = JoernSteps()
    neo4jdb.setGraphDbURL('http://localhost:7474/db/data/')
    neo4jdb.connectToDatabase()
    
    for obj in objects:
        vuln_patch_compare(obj.vuln_id, neo4jdb)
コード例 #23
0
ファイル: DBInterface.py プロジェクト: vlad902/joern-tools
 def connectToDatabase(self):
     self.j = JoernSteps()
     self.j.addStepsDir(JOERN_TOOLS_STEPDIR)
     self.j.connectToDatabase()
コード例 #24
0
ファイル: chucky_joern.py プロジェクト: a0x77n/chucky-tools
 def _init_joern_interface(self, step_dir=None):
     self._joern = JoernSteps()
     if step_dir:
         self._joern.addStepsDir(step_dir)
     self._joern.connectToDatabase()
     self.__is_initialized = True
コード例 #25
0
def bug_finder(request):
    if request.method == "GET":
        software_sel = software_sel_form()
        return render_to_response(
            "bug_finder.html",
            RequestContext(request, {"software_sel": software_sel}))
    else:
        if request.POST.has_key("sel_vuln"):
            soft_id = int(request.POST.get("software"))
            soft_name = softwares.objects.get(
                software_id=soft_id).software_name

            #查询当前软件(不含版本)所涉及的所有漏洞函数
            softs = softwares.objects.filter(software_name=soft_name)
            #先查到涉及的所有cve
            cves = []
            for soft in softs:
                cves.extend(soft.cve_infos_set.all())

            #查到涉及的所有漏洞
            sel_vuln = vulnerability_info.objects.filter(cve_info__in=cves,
                                                         is_in_db=True)

            software_sel = software_sel_form(request.POST)

            return render_to_response(
                "bug_finder.html",
                RequestContext(request, {
                    "sel_vuln": sel_vuln,
                    "software_sel": software_sel
                }))

        elif request.POST.has_key("find"):
            if not is_db_on():
                return HttpResponse(u"特征数据库未启动,请先启动特征数据库")

            soft = softwares.objects.get(
                software_id=int(request.POST.get("software")))
            try:
                db = graph_dbs.objects.get(soft=soft)
                #检测软件数据库是否启动
                if not is_db_on(db.port):
                    return HttpResponse("软件图形数据库未启动")

                #连接软件数据库
                soft_db = JoernSteps()
                try:
                    soft_db.setGraphDbURL("http://localhost:%d/db/data/" %
                                          db.port)
                    soft_db.connectToDatabase()
                except:
                    return HttpResponse("连接软件数据库失败! port:%d" % db.port)

                #连接特征数据库
                character_db = JoernSteps()
                try:
                    character_db.setGraphDbURL(
                        "http://localhost:7474/db/data/")
                    character_db.connectToDatabase()
                except:
                    return HttpResponse("连接特征数据库失败!")

                #根据选择使用不同的算法
                alg = request.POST.get("algorithm")
                if alg == "CFG":
                    th = Thread(target=func_similarity_cfgLevel_proc,
                                args=(soft, soft_db, character_db,
                                      request.POST.getlist("vuln_infos")))
                    th.start()
                elif alg == "PDG":
                    th = Thread(target=func_similarity_pdgLevel_proc,
                                args=(soft, soft_db, character_db,
                                      request.POST.getlist("vuln_infos")))
                    th.start()

                return HttpResponse("已启动线程进行计算,请等候!")
            except graph_dbs.DoesNotExist:
                return HttpResponse("软件图形数据库未生成")
コード例 #26
0
        def produce_nodes_string():
            def queryParent(j, nodeId):
                j.connectToDatabase()
                parent = j.runGremlinQuery('g.v(' + str(nodeId) +
                                           ').parents()')
                return parent

            def getStringForNode(node, nodes_and_parents):
                global global_node_types
                parent = nodes_and_parents[node]

                code = str(parent[0].properties['code']).replace(',', '')
                code = code.replace('¬', '')
                parentString = parent[0].properties[
                    'type'] + "," + code + "," + str(
                        parent[0].properties['functionId']) + "," + str(
                            parent[0].properties['childNum'])
                parent_identifier = hash(tuple(parentString))
                code = str(node.properties['code']).replace(',', '')
                code = code.replace('¬', '')
                nodeString = node.properties['type'] + "," + code + "," + str(
                    node.properties['functionId']) + "," + str(
                        node.properties['childNum'])
                node_identifier = hash(tuple(nodeString))

                addition_string = str(node_identifier) + "," + str(
                    node.properties['type']) + "," + str(
                        node.properties['code']) + "," + str(
                            node.properties['functionId']) + "," + str(
                                node.properties['childNum']) + "," + str(
                                    parent_identifier) + "¬"
                #global_node_types.add(node.properties['type'])
                return addition_string

            syntactical_features = []
            #max_depth_ast = get_max_depth_ast()
            #ast_node_types_tfs = get_node_types_tfs()
            #ast_node_types_tfidfs = get_node_types_tfidfs()
            #ast_node_type_avg_depths = get_node_type_avg_depth()
            #keywords_term_frequency = get_keywords_term_frequency()

            global global_node_types

            ast_features = [0] * 57

            all_nodes_string = ""
            j = JoernSteps()
            j.setGraphDbURL('http://localhost:7474/db/data/')
            j.connectToDatabase()
            root_nodes = j.runGremlinQuery(
                'queryNodeIndex("type:FunctionDef")')
            all_ast_nodes = j.runGremlinQuery(
                'queryNodeIndex("type:FunctionDef").astNodes()')
            ast_parents = j.runGremlinQuery(
                'queryNodeIndex("type:FunctionDef").astNodes().parents()')
            nodes_and_parents = {}

            for node in all_ast_nodes:
                nodes_and_parents[node] = queryParent(j, node._id)

            for node in all_ast_nodes:
                if not node in root_nodes:
                    all_nodes_string += getStringForNode(
                        node, nodes_and_parents)

            for ast_node in all_ast_nodes:
                x = 0
                while x < len(global_node_types):
                    if global_node_types[x] == ast_node.properties['type']:
                        ast_features[x] += 1
                        x = x + 1
                        continue
                    else:
                        x = x + 1

            #print(ast_features)
            #print(all_nodes_string)

            return all_nodes_string, ast_features
コード例 #27
0
 def __init__(self):
     """Constructor"""
     self.JS = JoernSteps()
     self.JS.setGraphDbURL(NEO4J_URL)
     self.JS.connectToDatabase()