def createRawGraph(): graphity = nx.DiGraph() debugDict = {} functions = R2PY.cmd("aflj") # print("Functions") # print(functions) # return {},{} if functions: functionList = json.loads(functions) # print json.dumps(functionList, indent=4, sort_keys=True) else: functionList = [] # figuring out code section size total sectionsList = getCodeSections() xlen = 0 for execSec in sectionsList: xlen = xlen + execSec[2] debugDict['xsectionsize'] = xlen # CREATING THE GRAPH refsGlobalVar = 0 refsUnrecognized = 0 refsFunc = 0 debugDict['functions'] = len(functionList) ### NetworkX Graph Structure ### # FUNCTION as node, attributes: function address, size, calltype, list of calls, list of strings, count of calls; functiontype[Callback, Export], alias (e.g. export name), mnemonic distribution # FUNCTIoN REFERENCE as edge (function address -> target address), attributes: ref offset (at) # INDIRECT REFERENCE as edge (currently for threads and Windows hooks, also indirect code and indirect data references) # API CALLS (list attribute of function node): address, API name # STRINGS (list attribute of function node): address, string, evaluation #### # TODO add count of refs from A to B as weights to edges # TODO count calls to global vars, to indirect targets for item in functionList: # print hex(item['offset']) graphity.add_node(hex(item['offset']), size=item['realsz'], calltype=item['calltype'], calls=[], apicallcount=0, strings=[], stringcount=0, functiontype='') for item in functionList: # TODO look into new values provided by aflj # print(item) if 'callrefs' in item: for xref in item['callrefs']: if xref['type'] == 'C': # If an edge is added, that includes a non-existent node, the node will be added, but w/o the necessary attributes # Thasss why we iterate twice, can theoretically be speeded up but needs testing if hex(xref['addr']) in graphity: if item['offset'] != xref['addr']: graphity.add_edge(hex(item['offset']), hex(xref['addr']), pos=hex(xref['at'])) refsFunc = refsFunc + 1 elif hex(xref['addr']) in getIat(): pass elif not isValidCode(hex(xref['addr']), sectionsList): # TODO do something '''print( "DANGLING call to address outside code section, glob var, dynamic API loading %s -> %s" % ( hex(item['offset']), hex(xref['addr'])))''' refsGlobalVar = refsGlobalVar + 1 else: print( "FAIL: Call to code thats not a function, an import/symbol or otherwise recognized. Missed function perhaps. %s -> %s" % (hex(item['offset']), hex(xref['addr']))) refsUnrecognized = refsUnrecognized + 1 print('* %s Graph created with NetworkX ' % str(datetime.now())) debugDict['refsFunctions'] = refsFunc debugDict['refsGlobalVar'] = refsGlobalVar debugDict['refsUnrecognized'] = refsUnrecognized apiRefs = crossRefScan() callNum = len(apiRefs) missesNum = 0 # FITTING GRAPH WITH API REFS for call in apiRefs: # get the address of the function, that contains the call to a given symbol funcAddress = gimmeRespectiveFunction(call) # TODO check if funcAddress is the real function address if funcAddress in graphity: # node(funcAddress) has attribute calls, which contains a list of API calls api = gimmeDatApiName(apiRefs[call]) graphity.node[funcAddress]['calls'].append([call, api]) # detected API call reference does not resolve to a function offset, insert handling for this here else: # print("DANGLING API CALL %s %s" % (call, apiRefs[call])) missesNum = missesNum + 1 # debug: print total API refs and functionless API refs, maybe indicator for obfuscated code print( '* %s Graph extended with API calls, %d calls in total, %d dangling w/o function reference ' % (str(datetime.now()), callNum, missesNum)) debugDict['apiTotal'] = callNum debugDict['apiMisses'] = missesNum # FITTING GRAPH WITH STRING REFS allTheStrings = stringScan(debugDict) stringrefs = 0 for aString in allTheStrings: stringAddr = aString[0] stringFunc = aString[1] stringData = aString[2] # add string to respective function node in graph if stringFunc in graphity: graphity.node[stringFunc]['strings'].append( [stringAddr, stringData]) stringrefs = stringrefs + 1 else: print("\n*** BIG FAIL *** String's function not in graph %s %s" % (stringFunc, stringData)) print('* %s Graph extended with string references ' % (str(datetime.now()))) debugDict['stringsReferencedTotal'] = stringrefs return graphity, debugDict
def createSeGraph(): graphity = nx.DiGraph() debugDict = {} functions = R2PY.cmd("aflj") if functions: functionList = json.loads(functions) else: functionList = [] sectionsList = getCodeSections() xlen = 0 for execSec in sectionsList: xlen = xlen + execSec[2] debugDict['xsectionsize'] = xlen # CREATING THE GRAPH refsGlobalVar = 0 refsUnrecognized = 0 refsFunc = 0 debugDict['functions'] = len(functionList) for item in functionList: graphity.add_node(hex(item['offset']), size=item['size'], calltype=item['calltype'], calls=[], apicallcount=0, strings=[]) for item in functionList: for xref in item['callrefs']: if xref['type'] == 'C': # If an edge is added, that includes a non-existent node, the node will be added, but w/o the necessary attributes # Thasss why we iterate twice, can theoretically be speeded up but needs testing if hex(xref['addr']) in graphity: graphity.add_edge(hex(item['offset']), hex(xref['addr']), pos=hex(xref['at'])) refsFunc = refsFunc + 1 elif hex(xref['addr']) in getIat(): pass elif not isValidCode(hex(xref['addr']), sectionsList): print "DANGLING call to address outside code section, glob var, dynamic API loading %s -> %s" % ( hex(item['offset']), hex(xref['addr'])) refsGlobalVar = refsGlobalVar + 1 else: print "FAIL: Call to code thats not a function, an import/symbol or otherwise recognized. Missed function perhaps. %s -> %s" % ( hex(item['offset']), hex(xref['addr'])) refsUnrecognized = refsUnrecognized + 1 print '* %s Graph created with NetworkX ' % str(datetime.now()) debugDict['refsFunctions'] = refsFunc debugDict['refsGlobalVar'] = refsGlobalVar debugDict['refsUnrecognized'] = refsUnrecognized #loadFlirts() apiRefs = crossRefScan() callNum = len(apiRefs) missesNum = 0 # FITTING GRAPH WITH API REFS for call in apiRefs: # get the address of the function, that contains the call to a given symbol refAddressCmd = "?v $FB @ " + call funcAddress = R2PY.cmd(refAddressCmd) if funcAddress in graphity: # node(funcAddress) has attribute calls, which contains a list of API calls api = gimmeDatApiName(apiRefs[call]) graphity.node[funcAddress]['calls'].append([call, api]) apicount = graphity.node[funcAddress]['apicallcount'] graphity.node[funcAddress]['apicallcount'] = apicount + 1 # detected API call reference does not resolve to a function offset, insert handling for this here else: print "DANGLING API CALL %s %s" % (call, apiRefs[call]) missesNum = missesNum + 1 # debug: print total API refs and functionless API refs, maybe indicator for obfuscated code print '* %s Graph extended with API calls, %d calls in total, %d dangling w/o function reference ' % ( str(datetime.now()), callNum, missesNum) debugDict['apiTotal'] = callNum debugDict['apiMisses'] = missesNum # FITTING GRAPH WITH STRING REFS allTheStrings = stringScan(debugDict) stringrefs = 0 for aString in allTheStrings: stringAddr = aString[0] stringFunc = aString[1] stringData = aString[2] # add string to respective function node in graph if stringFunc in graphity: graphity.node[stringFunc]['strings'].append( [stringAddr, stringData]) stringrefs = stringrefs + 1 else: print "\nFAIL: String's function not in graph %s %s" % (stringFunc, stringData) print '* %s Graph extended with string references ' % (str(datetime.now())) debugDict['stringsReferencedTotal'] = stringrefs return graphity, debugDict