def dumpGraphInfoCsv(graphity, debug, csvfile): # filename, filetype, filesize, md5, compilationtime, addressep, sectionep, tlssections, originalfilename, sectioncount, sectiondata, functionstotal, refslocal, refsglobalvar, refsunknown, apitotal, apimisses, stringsreferenced, stringsdangling, stringsnoref final = [] allAtts = getAllAttributes(sys.argv[1]) if os.path.isfile(csvfile): dumpfile = open(csvfile, 'a') else: try: dumpfile = open(csvfile, 'w') dumpfile.write("filename;filetype;filesize;md5;imphash;compilationtime;addressep;sectionep;tlssections;originalfilename;sectioncount;secname1;secname2;secname3;secname4;secname5;secname6;secsize1;secsize2;secsize3;secsize4;secsize5;secsize6;secent1;secent2;secent3;secent4;secent5;secent6;functionstotal;refslocal;refsglobalvar;refsunknown;apitotal;apimisses;stringsreferenced;stringsdangling;stringsnoref") dumpfile.write("\n") except: print "ERROR couldn't create the csv dump file" return final.append(allAtts['filename']) final.append(allAtts['filetype'].replace(',','')) final.append(str(allAtts['filesize'])) final.append(allAtts['md5']) final.append(allAtts['imphash']) final.append(allAtts['compilationts']) final.append(hex(allAtts['addressep'])) final.append(allAtts['sectionep']) final.append(str(allAtts['tlssections'])) final.append(allAtts['originalfilename']) final.append(str(allAtts['sectioncount'])) secStuff = allAtts['sectioninfo'][:6] + allAtts['sectioninfo'][12:18] + allAtts['sectioninfo'][24:30] final = final + secStuff #print ";".join(map(str, secStuff)) + ";" final.append(debug['functions']) final.append(debug['refsFunctions']) final.append(debug['refsGlobalVar']) final.append(debug['refsUnrecognized']) final.append(debug['apiTotal']) final.append(debug['apiMisses']) final.append(debug['stringsReferencedTotal']) final.append(debug['stringsDanglingTotal']) final.append(debug['stringsNoRefTotal']) theline = ";".join(map(str, final)) + "\n" dumpfile.write(theline) dumpfile.close()
def get_behaviors(filepath, dst_file, out_dir): global BENCH BENCH = {} behaviours = {} if check_pe_header(filepath): print('* %s Parsing %s ' % (str(datetime.now()), filepath)) allAtts = getAllAttributes(filepath) graphity, debug = graphMagix(filepath, allAtts, True) # args.deactivatecache) # BEHAVIOR print('* %s Scanning for API patterns ' % str(datetime.now())) BENCH['behavior_start'] = time() allThePatterns = graphityFunc.funcDict for patty in allThePatterns: # print(patty) findings = patternScan(graphity, allThePatterns[patty]) # print("Findings:") # print(findings) for hit in findings: if not False in hit['patterns'].values(): #print("For %s found %s" % (patty, str(hit['patterns']))) if patty in behaviours: list_hit = behaviours[patty] list_hit.append(hit['patterns']) behaviours[patty] = list_hit else: behaviours[patty] = [hit['patterns']] BENCH['behavior_end'] = time() ret_info = {} function_list = {} # print("printing behaviors found above") if behaviours: for behav in behaviours: info = behaviours[behav] # print(info) for entry in info: for name in entry: if not str(entry[name]) in function_list: function_list[str(entry[name])] = behav # print(entry) # print function_list base_file = dst_file.replace(".behav.json", "") for funct in function_list: R2PY.cmd("s." + funct) pseudo_code = R2PY.cmd("pdc") code_file = base_file + "." + function_list[ funct] + "_" + funct + ".c" with open(code_file, "w") as out: for line in pseudo_code.split("\n"): line = line.rstrip() if line: out.write(line + "\n") # print(function_list) ret_info["Suspicious Behaviors"] = behaviours with open(dst_file, "w") as out: out.write(json.dumps(ret_info, sort_keys=True, indent=4)) print('* %s Plotting routine starting ' % str(datetime.now())) BENCH['plotting_start'] = time() graphvizPlot(graphity, allAtts, function_list, out_dir) BENCH['plotting_end'] = time() print('* %s Plotting routine finished ' % str(datetime.now())) return ret_info
args = parser.parse_args() # TODO check the path pythonically # Batch processing options: csvdump, neodump, TBC if args.input and os.path.isdir(args.input): for (dirpath, dirnames, filenames) in os.walk(args.input): for filename in filenames: filepath = os.path.join(dirpath, filename) if check_pe_header(filepath): print('* %s Parsing %s ' % (str(datetime.now()), filename)) allAtts = getAllAttributes(filepath) graphity, debug = graphMagix(filepath, allAtts, args.deactivatecache) if args.csvdump: # CSVDUMP dumpGraphInfoCsv(graphity, debug, allAtts, args.csvdump) print('* %s Dumping graph info to indicated csv file ' % str(datetime.now())) if args.neodump: # TO NEO STUFF toNeo(graphity, allAtts) print('* %s Dumped to Neo4J ' % str(datetime.now())) elif args.input and check_pe_header(args.input): # ATTRIBUTES: md5, sha1, filename, filetype, ssdeep, filesize, imphash, compilationts, addressep, sectionep,
) parser.add_argument( "-c", "--csvdump", help="Dump info data to a given csv file, appends a line per sample") args = parser.parse_args() if args.input and check_pe_header(args.input): R2PY = r2pipe.open(args.input) # benchmarking :P bench = {} allAtts = getAllAttributes(args.input) print '* %s R2 started analysis ' % str(datetime.now()) bench['r2_start'] = time() R2PY.cmd("e scr.color = false") R2PY.cmd("e asm.bytes = false") R2PY.cmd("e asm.lines = false") R2PY.cmd("e asm.fcnlines = false") R2PY.cmd("e asm.xrefs = false") R2PY.cmd("e asm.lbytes = false") R2PY.cmd("e asm.indentspace = 0") R2PY.cmd("e anal.autoname= false") R2PY.cmd("e anal.jmptbl = true") R2PY.cmd("e anal.hasnext = true")
def printGraphInfo(graphity, debug): # GENERAL INFO print ".\nGeneral graph info:" allAtts = getAllAttributes(sys.argv[1]) print "SAMPLE " + allAtts['filename'] print "Type: " + allAtts['filetype'] print "Size: " + str(allAtts['filesize']) print "MD5: " + allAtts['md5'] print nx.info(graphity) # GRAPH PARSING INFO print ".\nGraph measurement data:" print "%6d Total functions detected with 'aflj'" % debug['functions'] print "%6d Count of references to local functions" % debug['refsFunctions'] print "%6d Count of references to data section, global variables" % debug[ 'refsGlobalVar'] print "%6d Count of references to unrecognized locations" % debug[ 'refsUnrecognized'] print "%6d Total API refs found via symbol xref check" % debug['apiTotal'] print "%6d Count APIs w/o function xref" % debug['apiMisses'] print "%6d Total referenced Strings" % debug['stringsReferencedTotal'] print "%6d Count of dangling strings (w/o function reference)" % debug[ 'stringsDanglingTotal'] print "%6d Count of strings w/o any reference" % debug['stringsNoRefTotal'] # PE DETAILS print ".\nPE details:" print "Imphash:\t\t" + allAtts['imphash'] print "Compilation time:\t" + allAtts['compilationts'] print "Entrypoint address:\t" + hex(allAtts['addressep']) print "Entrypoint section:\t" + allAtts['sectionep'] print "TLS section count:\t" + str(allAtts['tlssections']) print "Original filename:\t" + allAtts['originalfilename'] print "Section count:\t\t" + str(allAtts['sectioncount']) print "Section details:" #+ str(allAtts['sectioninfo']) i = 0 while i < allAtts['sectioncount'] and i < 12: print "%8s %8d %s" % (allAtts['sectioninfo'][i], allAtts['sectioninfo'][i + 12], allAtts['sectioninfo'][i + 24]) i = i + 1 # TODO resources list try: degrees = nx.out_degree_centrality(graphity) except: degrees = 0 indegrees = graphity.in_degree() # SPAGHETTI CODE METRICS print ".\nFat node detection with out-degree centrality, count calls, count strings:" if degrees: sortit = sorted(degrees, key=degrees.get, reverse=True) for val in sortit[:20]: print "%s %.6f %d %d" % (val, degrees[val], len(graphity.node[val]['calls']), len(graphity.node[val]['strings'])) print '.' # OUT DEGREE CENTRALITY HISTOGRAM print "Histogram of out degree centrality:" nummy = np.array(degrees.values()) bins = [ 0, 0.0005, 0.001, 0.0015, 0.002, 0.004, 0.006, 0.008, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.2, 0.3, 0.4, 0.5 ] hist, bin_edges = np.histogram(nummy, bins=bins) for be in bin_edges: print be, print "" for hi in hist: print hi, print "\n." # LOOSE NODE COUNT numInZero = 0 for val in indegrees: if indegrees[val] == 0: numInZero = numInZero + 1 nodeNum = graphity.number_of_nodes() if not nodeNum: nodeNum = 1 print "Loose nodes %d of total %d, thats %f%%" % ( numInZero, nodeNum, 100.0 * (float(numInZero) / float(nodeNum))) # RATIO OF API CALLS AND STRINGS WITHING CODE SECTION print ".\nExecSize FunctionCount ApiCount StringCount" print "%d %d %d %d" % ( debug['xsectionsize'], debug['functions'], debug['apiTotal'], debug['stringsReferencedTotal'] ) # code section size, function count, total api, total string kilobytes = (float(debug['xsectionsize']) / 1000.0) if kilobytes > 0: print "Per-Kilobyte ratio" print float(debug['functions']) / kilobytes, float( debug['apiTotal']) / kilobytes, float( debug['stringsReferencedTotal']) / kilobytes # AVERAGE DEGREE CONNECTIVITY print ".\nAverage degree connectivity per degree k:" #average nearest neighbor degree of nodes with degree k avConn = nx.average_degree_connectivity(graphity) for connectivity in avConn: print "%3d %.6f" % (connectivity, avConn[connectivity]) print "." # GETPROCADDRESS DETECTION, not a suuuuper useful metric, but interesting to look at, different from beh. detection, cause count is total allCalls = nx.get_node_attributes(graphity, 'calls') gpaCount = 0 for function in allCalls: for call in allCalls[function]: if 'GetProcAddress' in call[1]: gpaCount = gpaCount + 1 print "Found %d calls to GetProcAddress\n." % gpaCount
def plotSeGraph(graphity): pydotMe = nx.drawing.nx_pydot.to_pydot(graphity) for node in pydotMe.get_nodes(): finalString = '' if node.get('calls') != '[]' or node.get('strings') != '[]': # TODO THE single ugliest piece of code I ever wrote. Now I'll promise to fix this in the future, priority -1... duh finalList = [] for item in node.get('calls').split('[\''): if item.startswith('0x'): stuff = item.split('\'') finalList.append(str(stuff[0]) + ": [C] " + str(stuff[2])) try: for otherItem in node.get('strings').split('[\''): if otherItem.startswith('0x'): stuff = otherItem.split('\'') finalList.append(str(stuff[0]) + ": [S] " + str(stuff[2])) except: print "Trouble with string " + str(stuff) finalList.sort() finalString = '\n'.join(finalList) if node.get('type') == 'Export': label = "Export " + node.get('alias') label = label + "\n" + finalString node.set_fillcolor('skyblue') node.set_style('filled,setlinewidth(3.0)') node.set_label(label) elif node.get('type') == 'Callback': label = "Callback " + "\n" + finalString node.set_fillcolor('darkolivegreen1') node.set_style('filled,setlinewidth(3.0)') node.set_label(label) elif finalString != '': nodeaddr = node.to_string().split()[0] # dirrty hack ^^ finalString = nodeaddr + "\n" + finalString node.set_fillcolor('lightpink1') node.set_style('filled,setlinewidth(3.0)') node.set_label(finalString) allAtts = getAllAttributes(sys.argv[1]) graphinfo = "SAMPLE " + allAtts['filename'] + "\nType: " + allAtts['filetype'] + "\nSize: " + str(allAtts['filesize']) + "\nMD5: " + allAtts['md5'] + "\nImphash:\t\t" + allAtts['imphash'] + "\nCompilation time:\t" + allAtts['compilationts'] + "\nEntrypoint section:\t" + allAtts['sectionep'] titleNode = Node() titleNode.set_label(graphinfo) titleNode.set_shape('rectangle') titleNode.set_fillcolor('red') titleNode.set_style('filled') pydotMe.add_node(titleNode) graphname = os.path.basename(sys.argv[1]) + ".png" try: # TODO pydotplus throws an error sometimes (Error: /tmp/tmp6XgKth: syntax error in line 92 near '[') look into pdp code to see why pydotMe.write_png(os.path.join(os.path.abspath(os.path.dirname(__file__)), graphname)) except Exception as e: print "ERROR drawing graph" print str(e)