Esempio n. 1
0
def dumpGraphInfoCsv(graphity, debug, csvfile):
	
	# 	filename, filetype, filesize, md5, compilationtime, addressep, sectionep, tlssections, originalfilename, sectioncount, sectiondata, functionstotal, refslocal, refsglobalvar, refsunknown, apitotal, apimisses, stringsreferenced, stringsdangling, stringsnoref

	final = []
	allAtts = getAllAttributes(sys.argv[1])
	if os.path.isfile(csvfile):
		dumpfile = open(csvfile, 'a')
	else:
		try:
			dumpfile = open(csvfile, 'w')
			dumpfile.write("filename;filetype;filesize;md5;imphash;compilationtime;addressep;sectionep;tlssections;originalfilename;sectioncount;secname1;secname2;secname3;secname4;secname5;secname6;secsize1;secsize2;secsize3;secsize4;secsize5;secsize6;secent1;secent2;secent3;secent4;secent5;secent6;functionstotal;refslocal;refsglobalvar;refsunknown;apitotal;apimisses;stringsreferenced;stringsdangling;stringsnoref")
			dumpfile.write("\n")
		except:
			print "ERROR couldn't create the csv dump file"
			return
	

	final.append(allAtts['filename'])
	final.append(allAtts['filetype'].replace(',',''))
	final.append(str(allAtts['filesize']))
	final.append(allAtts['md5'])
	final.append(allAtts['imphash'])
	final.append(allAtts['compilationts']) 
	final.append(hex(allAtts['addressep']))
	final.append(allAtts['sectionep'])
	final.append(str(allAtts['tlssections']))
	final.append(allAtts['originalfilename'])
	final.append(str(allAtts['sectioncount']))
	
	secStuff = allAtts['sectioninfo'][:6] + allAtts['sectioninfo'][12:18] + allAtts['sectioninfo'][24:30]
	final = final + secStuff
	#print ";".join(map(str, secStuff)) + ";"

	final.append(debug['functions'])
	final.append(debug['refsFunctions'])
	final.append(debug['refsGlobalVar'])
	final.append(debug['refsUnrecognized'])
	final.append(debug['apiTotal'])
	final.append(debug['apiMisses'])
	final.append(debug['stringsReferencedTotal'])
	final.append(debug['stringsDanglingTotal'])
	final.append(debug['stringsNoRefTotal'])
	
	theline = ";".join(map(str, final)) + "\n"
	
	dumpfile.write(theline)
	dumpfile.close()
Esempio n. 2
0
def get_behaviors(filepath, dst_file, out_dir):
    global BENCH
    BENCH = {}

    behaviours = {}
    if check_pe_header(filepath):
        print('* %s Parsing %s ' % (str(datetime.now()), filepath))
        allAtts = getAllAttributes(filepath)
        graphity, debug = graphMagix(filepath, allAtts,
                                     True)  # args.deactivatecache)

        # BEHAVIOR
        print('* %s Scanning for API patterns ' % str(datetime.now()))
        BENCH['behavior_start'] = time()
        allThePatterns = graphityFunc.funcDict

        for patty in allThePatterns:
            # print(patty)
            findings = patternScan(graphity, allThePatterns[patty])
            # print("Findings:")
            # print(findings)
            for hit in findings:
                if not False in hit['patterns'].values():
                    #print("For %s found %s" % (patty, str(hit['patterns'])))
                    if patty in behaviours:
                        list_hit = behaviours[patty]
                        list_hit.append(hit['patterns'])
                        behaviours[patty] = list_hit
                    else:
                        behaviours[patty] = [hit['patterns']]
        BENCH['behavior_end'] = time()

    ret_info = {}
    function_list = {}
    # print("printing behaviors found above")
    if behaviours:
        for behav in behaviours:
            info = behaviours[behav]
            # print(info)
            for entry in info:
                for name in entry:
                    if not str(entry[name]) in function_list:
                        function_list[str(entry[name])] = behav
                        # print(entry)
                        # print function_list

        base_file = dst_file.replace(".behav.json", "")
        for funct in function_list:
            R2PY.cmd("s." + funct)
            pseudo_code = R2PY.cmd("pdc")
            code_file = base_file + "." + function_list[
                funct] + "_" + funct + ".c"
            with open(code_file, "w") as out:
                for line in pseudo_code.split("\n"):
                    line = line.rstrip()
                    if line:
                        out.write(line + "\n")

        # print(function_list)
        ret_info["Suspicious Behaviors"] = behaviours
        with open(dst_file, "w") as out:
            out.write(json.dumps(ret_info, sort_keys=True, indent=4))

    print('* %s Plotting routine starting ' % str(datetime.now()))
    BENCH['plotting_start'] = time()
    graphvizPlot(graphity, allAtts, function_list, out_dir)
    BENCH['plotting_end'] = time()
    print('* %s Plotting routine finished ' % str(datetime.now()))

    return ret_info
Esempio n. 3
0
	args = parser.parse_args()
	# TODO check the path pythonically

	# Batch processing options: csvdump, neodump, TBC

	if args.input and os.path.isdir(args.input):

		for (dirpath, dirnames, filenames) in os.walk(args.input):
			for filename in filenames:
				filepath = os.path.join(dirpath, filename)

				if check_pe_header(filepath):

					print('* %s Parsing %s ' % (str(datetime.now()), filename))

					allAtts = getAllAttributes(filepath)
					graphity, debug = graphMagix(filepath, allAtts, args.deactivatecache)

					if args.csvdump:
						# CSVDUMP
						dumpGraphInfoCsv(graphity, debug, allAtts, args.csvdump)
						print('* %s Dumping graph info to indicated csv file ' % str(datetime.now()))

					if args.neodump:
						# TO NEO STUFF
						toNeo(graphity, allAtts)
						print('* %s Dumped to Neo4J ' % str(datetime.now()))

	elif args.input and check_pe_header(args.input):

		# ATTRIBUTES: md5, sha1, filename, filetype, ssdeep, filesize, imphash, compilationts, addressep, sectionep,
Esempio n. 4
0
    )
    parser.add_argument(
        "-c",
        "--csvdump",
        help="Dump info data to a given csv file, appends a line per sample")

    args = parser.parse_args()

    if args.input and check_pe_header(args.input):

        R2PY = r2pipe.open(args.input)

        # benchmarking :P
        bench = {}

        allAtts = getAllAttributes(args.input)

        print '* %s R2 started analysis ' % str(datetime.now())

        bench['r2_start'] = time()
        R2PY.cmd("e scr.color = false")
        R2PY.cmd("e asm.bytes = false")
        R2PY.cmd("e asm.lines = false")
        R2PY.cmd("e asm.fcnlines = false")
        R2PY.cmd("e asm.xrefs = false")
        R2PY.cmd("e asm.lbytes = false")
        R2PY.cmd("e asm.indentspace = 0")
        R2PY.cmd("e anal.autoname= false")

        R2PY.cmd("e anal.jmptbl = true")
        R2PY.cmd("e anal.hasnext = true")
Esempio n. 5
0
def printGraphInfo(graphity, debug):

    # GENERAL INFO
    print ".\nGeneral graph info:"
    allAtts = getAllAttributes(sys.argv[1])
    print "SAMPLE " + allAtts['filename']
    print "Type: " + allAtts['filetype']
    print "Size: " + str(allAtts['filesize'])
    print "MD5: " + allAtts['md5']
    print nx.info(graphity)

    # GRAPH PARSING INFO
    print ".\nGraph measurement data:"
    print "%6d Total functions detected with 'aflj'" % debug['functions']
    print "%6d Count of references to local functions" % debug['refsFunctions']
    print "%6d Count of references to data section, global variables" % debug[
        'refsGlobalVar']
    print "%6d Count of references to unrecognized locations" % debug[
        'refsUnrecognized']
    print "%6d Total API refs found via symbol xref check" % debug['apiTotal']
    print "%6d Count APIs w/o function xref" % debug['apiMisses']
    print "%6d Total referenced Strings" % debug['stringsReferencedTotal']
    print "%6d Count of dangling strings (w/o function reference)" % debug[
        'stringsDanglingTotal']
    print "%6d Count of strings w/o any reference" % debug['stringsNoRefTotal']

    # PE DETAILS

    print ".\nPE details:"
    print "Imphash:\t\t" + allAtts['imphash']
    print "Compilation time:\t" + allAtts['compilationts']
    print "Entrypoint address:\t" + hex(allAtts['addressep'])
    print "Entrypoint section:\t" + allAtts['sectionep']
    print "TLS section count:\t" + str(allAtts['tlssections'])
    print "Original filename:\t" + allAtts['originalfilename']
    print "Section count:\t\t" + str(allAtts['sectioncount'])
    print "Section details:"  #+ str(allAtts['sectioninfo'])

    i = 0
    while i < allAtts['sectioncount'] and i < 12:
        print "%8s %8d %s" % (allAtts['sectioninfo'][i],
                              allAtts['sectioninfo'][i + 12],
                              allAtts['sectioninfo'][i + 24])
        i = i + 1

    # TODO resources list

    try:
        degrees = nx.out_degree_centrality(graphity)
    except:
        degrees = 0

    indegrees = graphity.in_degree()

    # SPAGHETTI CODE METRICS
    print ".\nFat node detection with out-degree centrality, count calls, count strings:"
    if degrees:
        sortit = sorted(degrees, key=degrees.get, reverse=True)
        for val in sortit[:20]:
            print "%s %.6f %d %d" % (val, degrees[val],
                                     len(graphity.node[val]['calls']),
                                     len(graphity.node[val]['strings']))

    print '.'

    # OUT DEGREE CENTRALITY HISTOGRAM
    print "Histogram of out degree centrality:"
    nummy = np.array(degrees.values())
    bins = [
        0, 0.0005, 0.001, 0.0015, 0.002, 0.004, 0.006, 0.008, 0.01, 0.02, 0.03,
        0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.2, 0.3, 0.4, 0.5
    ]
    hist, bin_edges = np.histogram(nummy, bins=bins)
    for be in bin_edges:
        print be,
    print ""
    for hi in hist:
        print hi,
    print "\n."

    # LOOSE NODE COUNT
    numInZero = 0
    for val in indegrees:
        if indegrees[val] == 0:
            numInZero = numInZero + 1
    nodeNum = graphity.number_of_nodes()
    if not nodeNum:
        nodeNum = 1

    print "Loose nodes %d of total %d, thats %f%%" % (
        numInZero, nodeNum, 100.0 * (float(numInZero) / float(nodeNum)))

    # RATIO OF API CALLS AND STRINGS WITHING CODE SECTION
    print ".\nExecSize FunctionCount ApiCount StringCount"
    print "%d %d %d %d" % (
        debug['xsectionsize'], debug['functions'], debug['apiTotal'],
        debug['stringsReferencedTotal']
    )  # code section size, function count, total api, total string

    kilobytes = (float(debug['xsectionsize']) / 1000.0)
    if kilobytes > 0:
        print "Per-Kilobyte ratio"
        print float(debug['functions']) / kilobytes, float(
            debug['apiTotal']) / kilobytes, float(
                debug['stringsReferencedTotal']) / kilobytes

    # AVERAGE DEGREE CONNECTIVITY
    print ".\nAverage degree connectivity per degree k:"  #average nearest neighbor degree of nodes with degree k
    avConn = nx.average_degree_connectivity(graphity)
    for connectivity in avConn:
        print "%3d %.6f" % (connectivity, avConn[connectivity])

    print "."

    # GETPROCADDRESS DETECTION, not a suuuuper useful metric, but interesting to look at, different from beh. detection, cause count is total
    allCalls = nx.get_node_attributes(graphity, 'calls')
    gpaCount = 0

    for function in allCalls:
        for call in allCalls[function]:
            if 'GetProcAddress' in call[1]:
                gpaCount = gpaCount + 1

    print "Found %d calls to GetProcAddress\n." % gpaCount
Esempio n. 6
0
def plotSeGraph(graphity):

	pydotMe = nx.drawing.nx_pydot.to_pydot(graphity)
	for node in pydotMe.get_nodes():
	
		finalString = ''
		if node.get('calls') != '[]' or node.get('strings') != '[]':

			# TODO THE single ugliest piece of code I ever wrote. Now I'll promise to fix this in the future, priority -1... duh
			finalList = []
			for item in node.get('calls').split('[\''):
				if item.startswith('0x'):
					stuff = item.split('\'')
					finalList.append(str(stuff[0]) + ": [C] " + str(stuff[2]))
			try:
				for otherItem in node.get('strings').split('[\''):
					if otherItem.startswith('0x'):
						stuff = otherItem.split('\'')
						finalList.append(str(stuff[0]) + ": [S] " + str(stuff[2]))
			except:
				print "Trouble with string " + str(stuff)
							
			finalList.sort()
			finalString = '\n'.join(finalList)
			
		if node.get('type') == 'Export':
			label = "Export " + node.get('alias')
			label = label + "\n" + finalString
			node.set_fillcolor('skyblue') 
			node.set_style('filled,setlinewidth(3.0)')
			node.set_label(label)
		
		elif node.get('type') == 'Callback':
			label = "Callback " + "\n" + finalString
			node.set_fillcolor('darkolivegreen1') 
			node.set_style('filled,setlinewidth(3.0)')
			node.set_label(label)
		
		elif finalString != '':
			nodeaddr = node.to_string().split()[0]			# dirrty hack ^^
			finalString = nodeaddr + "\n" + finalString
			node.set_fillcolor('lightpink1')
			node.set_style('filled,setlinewidth(3.0)')
			node.set_label(finalString)
	
	
	allAtts = getAllAttributes(sys.argv[1])
	graphinfo = "SAMPLE " + allAtts['filename'] + "\nType: " + allAtts['filetype'] + "\nSize: " + str(allAtts['filesize']) + "\nMD5: " + allAtts['md5'] + "\nImphash:\t\t" + allAtts['imphash'] + "\nCompilation time:\t" + allAtts['compilationts'] + "\nEntrypoint section:\t" + allAtts['sectionep'] 
	
	titleNode = Node()
	titleNode.set_label(graphinfo)
	titleNode.set_shape('rectangle')
	titleNode.set_fillcolor('red')
	titleNode.set_style('filled')
	pydotMe.add_node(titleNode)
	
	graphname = os.path.basename(sys.argv[1]) + ".png"
	try:
		# TODO pydotplus throws an error sometimes (Error: /tmp/tmp6XgKth: syntax error in line 92 near '[') look into pdp code to see why
		pydotMe.write_png(os.path.join(os.path.abspath(os.path.dirname(__file__)), graphname))
	except Exception as e:
		print "ERROR drawing graph"
		print str(e)