Example #1
0
def createRawGraph():
    graphity = nx.DiGraph()
    debugDict = {}

    functions = R2PY.cmd("aflj")
    # print("Functions")
    # print(functions)
    # return {},{}
    if functions:
        functionList = json.loads(functions)
    # print json.dumps(functionList, indent=4, sort_keys=True)
    else:
        functionList = []

    # figuring out code section size total
    sectionsList = getCodeSections()
    xlen = 0
    for execSec in sectionsList:
        xlen = xlen + execSec[2]
    debugDict['xsectionsize'] = xlen

    # CREATING THE GRAPH

    refsGlobalVar = 0
    refsUnrecognized = 0
    refsFunc = 0
    debugDict['functions'] = len(functionList)

    ### NetworkX Graph Structure ###

    # FUNCTION as node, attributes: function address, size, calltype, list of calls, list of strings, count of calls; functiontype[Callback, Export], alias (e.g. export name), mnemonic distribution
    # FUNCTIoN REFERENCE as edge (function address -> target address), attributes: ref offset (at)
    # INDIRECT REFERENCE as edge (currently for threads and Windows hooks, also indirect code and indirect data references)
    # API CALLS (list attribute of function node): address, API name
    # STRINGS (list attribute of function node): address, string, evaluation

    ####

    # TODO add count of refs from A to B as weights to edges
    # TODO count calls to global vars, to indirect targets

    for item in functionList:
        # print hex(item['offset'])
        graphity.add_node(hex(item['offset']),
                          size=item['realsz'],
                          calltype=item['calltype'],
                          calls=[],
                          apicallcount=0,
                          strings=[],
                          stringcount=0,
                          functiontype='')

    for item in functionList:

        # TODO look into new values provided by aflj
        # print(item)
        if 'callrefs' in item:
            for xref in item['callrefs']:

                if xref['type'] == 'C':

                    # If an edge is added, that includes a non-existent node, the node will be added, but w/o the necessary attributes
                    # Thasss why we iterate twice, can theoretically be speeded up but needs testing
                    if hex(xref['addr']) in graphity:
                        if item['offset'] != xref['addr']:
                            graphity.add_edge(hex(item['offset']),
                                              hex(xref['addr']),
                                              pos=hex(xref['at']))
                            refsFunc = refsFunc + 1

                    elif hex(xref['addr']) in getIat():
                        pass

                    elif not isValidCode(hex(xref['addr']), sectionsList):
                        # TODO do something
                        '''print(
                            "DANGLING call to address outside code section, glob var, dynamic API loading %s -> %s" % (
                            hex(item['offset']), hex(xref['addr'])))'''
                        refsGlobalVar = refsGlobalVar + 1

                    else:
                        print(
                            "FAIL: Call to code thats not a function, an import/symbol or otherwise recognized. Missed function perhaps. %s -> %s"
                            % (hex(item['offset']), hex(xref['addr'])))
                        refsUnrecognized = refsUnrecognized + 1

    print('* %s Graph created with NetworkX ' % str(datetime.now()))
    debugDict['refsFunctions'] = refsFunc
    debugDict['refsGlobalVar'] = refsGlobalVar
    debugDict['refsUnrecognized'] = refsUnrecognized

    apiRefs = crossRefScan()

    callNum = len(apiRefs)
    missesNum = 0

    # FITTING GRAPH WITH API REFS

    for call in apiRefs:

        # get the address of the function, that contains the call to a given symbol
        funcAddress = gimmeRespectiveFunction(call)
        # TODO check if funcAddress is the real function address
        if funcAddress in graphity:

            # node(funcAddress) has attribute calls, which contains a list of API calls
            api = gimmeDatApiName(apiRefs[call])

            graphity.node[funcAddress]['calls'].append([call, api])

        # detected API call reference does not resolve to a function offset, insert handling for this here
        else:
            # print("DANGLING API CALL %s %s" % (call, apiRefs[call]))
            missesNum = missesNum + 1

    # debug: print total API refs and functionless API refs, maybe indicator for obfuscated code
    print(
        '* %s Graph extended with API calls, %d calls in total, %d dangling w/o function reference '
        % (str(datetime.now()), callNum, missesNum))
    debugDict['apiTotal'] = callNum
    debugDict['apiMisses'] = missesNum

    # FITTING GRAPH WITH STRING REFS

    allTheStrings = stringScan(debugDict)
    stringrefs = 0

    for aString in allTheStrings:

        stringAddr = aString[0]
        stringFunc = aString[1]
        stringData = aString[2]

        # add string to respective function node in graph
        if stringFunc in graphity:
            graphity.node[stringFunc]['strings'].append(
                [stringAddr, stringData])
            stringrefs = stringrefs + 1

        else:
            print("\n*** BIG FAIL *** String's function not in graph %s %s" %
                  (stringFunc, stringData))

    print('* %s Graph extended with string references ' %
          (str(datetime.now())))
    debugDict['stringsReferencedTotal'] = stringrefs

    return graphity, debugDict
Example #2
0
def createSeGraph():

    graphity = nx.DiGraph()
    debugDict = {}

    functions = R2PY.cmd("aflj")
    if functions:
        functionList = json.loads(functions)
    else:
        functionList = []

    sectionsList = getCodeSections()

    xlen = 0
    for execSec in sectionsList:
        xlen = xlen + execSec[2]
    debugDict['xsectionsize'] = xlen

    # CREATING THE GRAPH

    refsGlobalVar = 0
    refsUnrecognized = 0
    refsFunc = 0
    debugDict['functions'] = len(functionList)

    for item in functionList:

        graphity.add_node(hex(item['offset']),
                          size=item['size'],
                          calltype=item['calltype'],
                          calls=[],
                          apicallcount=0,
                          strings=[])

    for item in functionList:

        for xref in item['callrefs']:

            if xref['type'] == 'C':

                # If an edge is added, that includes a non-existent node, the node will be added, but w/o the necessary attributes
                # Thasss why we iterate twice, can theoretically be speeded up but needs testing
                if hex(xref['addr']) in graphity:
                    graphity.add_edge(hex(item['offset']),
                                      hex(xref['addr']),
                                      pos=hex(xref['at']))
                    refsFunc = refsFunc + 1

                elif hex(xref['addr']) in getIat():
                    pass

                elif not isValidCode(hex(xref['addr']), sectionsList):
                    print "DANGLING call to address outside code section, glob var, dynamic API loading %s -> %s" % (
                        hex(item['offset']), hex(xref['addr']))
                    refsGlobalVar = refsGlobalVar + 1

                else:
                    print "FAIL: Call to code thats not a function, an import/symbol or otherwise recognized. Missed function perhaps. %s -> %s" % (
                        hex(item['offset']), hex(xref['addr']))
                    refsUnrecognized = refsUnrecognized + 1

    print '* %s Graph created with NetworkX ' % str(datetime.now())
    debugDict['refsFunctions'] = refsFunc
    debugDict['refsGlobalVar'] = refsGlobalVar
    debugDict['refsUnrecognized'] = refsUnrecognized

    #loadFlirts()
    apiRefs = crossRefScan()

    callNum = len(apiRefs)
    missesNum = 0

    # FITTING GRAPH WITH API REFS

    for call in apiRefs:

        # get the address of the function, that contains the call to a given symbol
        refAddressCmd = "?v $FB @ " + call
        funcAddress = R2PY.cmd(refAddressCmd)

        if funcAddress in graphity:

            # node(funcAddress) has attribute calls, which contains a list of API calls
            api = gimmeDatApiName(apiRefs[call])

            graphity.node[funcAddress]['calls'].append([call, api])
            apicount = graphity.node[funcAddress]['apicallcount']
            graphity.node[funcAddress]['apicallcount'] = apicount + 1

        # detected API call reference does not resolve to a function offset, insert handling for this here
        else:
            print "DANGLING API CALL %s %s" % (call, apiRefs[call])
            missesNum = missesNum + 1

    # debug: print total API refs and functionless API refs, maybe indicator for obfuscated code
    print '* %s Graph extended with API calls, %d calls in total, %d dangling w/o function reference ' % (
        str(datetime.now()), callNum, missesNum)
    debugDict['apiTotal'] = callNum
    debugDict['apiMisses'] = missesNum

    # FITTING GRAPH WITH STRING REFS

    allTheStrings = stringScan(debugDict)
    stringrefs = 0

    for aString in allTheStrings:

        stringAddr = aString[0]
        stringFunc = aString[1]
        stringData = aString[2]

        # add string to respective function node in graph
        if stringFunc in graphity:
            graphity.node[stringFunc]['strings'].append(
                [stringAddr, stringData])
            stringrefs = stringrefs + 1

        else:
            print "\nFAIL: String's function not in graph %s %s" % (stringFunc,
                                                                    stringData)

    print '* %s Graph extended with string references ' % (str(datetime.now()))
    debugDict['stringsReferencedTotal'] = stringrefs

    return graphity, debugDict