def execute(tfl): bbs={} args=config.SUT % tfl runcmd=config.PINCMD+args.split(' ') try: os.unlink(config.BBOUT) except: pass retc = run(runcmd) #check if loading address was changed #liboffsetprev=int(config.LIBOFFSETS[1],0) if config.LIBNUM == 2: if config.BIT64 == False: liboffsetcur=int(libfd_mm[:10],0) else: liboffsetcur=int(libfd_mm[:],0) libfd_mm.seek(0) if liboffsetcur != int(config.LIBOFFSETS[1],0): #print "Load address changed!" gau.die("load address changed..run again!") # open BB trace file to get BBs bbs = bbdict(config.BBOUT) if config.CLEANOUT == True: gau.delete_out_file(tfl) return (bbs,retc)
def execute(tfl): bbs = {} args = config.SUT % tfl ih = config.PINCMD.index("$") config.PINCMD[ih] = tfl runcmd = config.PINCMD + args.split(' ') try: os.unlink(config.BBOUT) except: pass retc = run(runcmd) #check if loading address was changed #liboffsetprev=int(config.LIBOFFSETS[1],0) if config.LIBNUM == 2: if config.BIT64 == False: liboffsetcur = int(libfd_mm[:10], 0) else: liboffsetcur = int(libfd_mm[:18], 0) libfd_mm.seek(0) if liboffsetcur != int(config.LIBOFFSETS[1], 0): #print "Load address changed!" print liboffsetcur, int(config.LIBOFFSETS[1], 0) gau.die("load address changed..run again!") # open BB trace file to get BBs bbs = bbdict(config.BBOUT) #print bbs for bb in bbs.keys(): config.globalbb.add(bb) config.tempbb.add(bb) if config.CLEANOUT == True: gau.delete_out_file(tfl) config.PINCMD[ih] = "$" return (bbs, retc)
def check_env(): ''' this function checks relevant environment variable that must be set before we stat our fuzzer..''' if os.getenv('PIN_ROOT') == None: gau.die("PIN_ROOT env is not set. Run export PIN_ROOT=path_to_pin_exe") fd1=open("/proc/sys/kernel/randomize_va_space",'r') b=fd1.read(1) fd1.close() if int(b) != 0: gau.die("ASLR is not disabled. Run: echo 0 | sudo tee /proc/sys/kernel/randomize_va_space")
def execute(tfl): bbs = {} args = config.SUT % tfl runcmd = config.BBCMD+args.split(' ') try: os.unlink(config.BBOUT) except: pass #print "[*] args : ", args #print "[*] tfl : ", tfl #print "[*] config.SUT : ", config.SUT #print "[*] execute : %s" % (" ".join(runcmd)) retc = run(runcmd) #print "[*] Done with exit code %d" % (retc) # check if loading address was changed # liboffsetprev=int(config.LIBOFFSETS[1],0) if config.LIBNUM == 2: if config.BIT64 == False: liboffsetcur = int(libfd_mm[:10], 0) else: liboffsetcur = int(libfd_mm[:18], 0) libfd_mm.seek(0) if liboffsetcur != int(config.LIBOFFSETS[1], 0): # print "Load address changed!" gau.die("[-] Load address of %s changed to 0x%x and you gave 0x%x : change it in launch options" % (config.LIBTOMONITOR, liboffsetcur, int(config.LIBOFFSETS[1], 0))) if config.CLEARSPECIALOUTPUT: gau.delete_special_out_file(config.SPECIALOUTPUT) # open BB trace file to get BBs bbs = bbdict(config.BBOUT) if config.CLEANOUT == True: gau.delete_out_file(tfl) return (bbs, retc)
def check_env(): ''' this function checks relevant environment variable that must be set before we stat our fuzzer..''' if os.getenv('PIN_ROOT') == None: gau.die("PIN_ROOT env is not set. Run export PIN_ROOT=path_to_pin_exe") fd1 = open("/proc/sys/kernel/randomize_va_space", 'r') b = fd1.read(1) fd1.close() if int(b) != 0: gau.die( "ASLR is not disabled. Run: echo 0 | sudo tee /proc/sys/kernel/randomize_va_space" ) fd = open("/proc/sys/kernel/yama/ptrace_scope", 'r') b = fd.read(1) fd.close() if int(b) != 0: gau.die( "Pintool may not work. Run: echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope" ) if os.path.ismount(config.BASETMP) == False: tmp = raw_input( "It seems that config.BASETMP is not mounted as tmpfs filesystem. Making it a tmpfs may give you gain on execution speed. Press [Y/y] to mount it OR press [N/n] to continue." ) if tmp.upper() == "Y": print "run: sudo mount -t tmpfs -o size=1024M tmpfs %s" % config.BASETMP raise SystemExit(1)
def get_taint(dirin, is_initial=0): ''' This function is used to get taintflow for each CMP instruction to find which offsets in the input are used at the instructions. It also gets the values used in the CMP.''' print "[*] starting taintflow calculation." files = os.listdir(dirin) #taintmap=dict()#this is a dictionary to keep taintmap of each input file. Key is the input file name and value is a tuple returned by read_taint, wherein 1st element is a set of all offsets used in cmp and 2nd elment is a dictionary with key a offset and value is a set of values at that offsetthat were found in CMP instructions. #mostcommon=dict()# this dictionary keeps offsets which are common across all the inputs with same value set. for fl in files: if fl in config.TAINTMAP: continue pfl = os.path.abspath(os.path.join(dirin, fl)) if is_initial == 1: tnow1 = datetime.now() rcode = execute2(pfl, fl, is_initial) if is_initial == 1: tnow2 = datetime.now() config.TIMEOUT = max(config.TIMEOUT, 2 * ((tnow2 - tnow1).total_seconds())) if rcode == 255: continue gau.die("pintool terminated with error 255 on input %s" % (pfl, )) config.TAINTMAP[fl] = read_taint(pfl) config.LEAMAP[fl] = read_lea() config.ANALYSIS_MAP[fl] = [{ 'INT8': offset_api.get_offset(1), 'INT16': offset_api.get_offset(2), 'INT32': offset_api.get_offset(4), 'INT64': offset_api.get_offset(8) }, offset_api.get_arrays(), offset_api.get_all_offsets(), offset_api.get_memchr_reward_arr(), offset_api.get_cmp_reward_arr() ] #, offset_api.get_malloc_reward_arr()] if config.MOSTCOMFLAG == False: print "computing MOSTCOM calculation..." for k1, v1 in config.TAINTMAP.iteritems(): for off1, vset1 in v1[1].iteritems(): tag = True if off1 > config.MAXOFFSET: config.TAINTMAP[k1][0].add(off1) #print "[==] ",k1,off1 continue for k2, v2 in config.TAINTMAP.iteritems(): if off1 not in v2[1]: config.TAINTMAP[k1][0].add(off1) #print k2,v2[1] tag = False break #print "passed..", off1 if len(set(vset1) & set(v2[1][off1]) ) == 0: #set(vset1) != set(v2[off1]) #print k1, k2, off1, set(vset1), set(v2[1][off1]) config.TAINTMAP[k1][0].add(off1) tag = False break #print "passed set", vset1 if tag == True: config.MOSTCOMMON[off1] = vset1[:] #print "[++]",config.MOSTCOMMON[off1] break # we just want to take one input and check if all the offsets in other inputs have commonality. else: print "computing MORECOM calculation..." for k1, v1 in config.TAINTMAP.iteritems(): for off1, vset1 in v1[1].iteritems(): tag = True for k2, v2 in config.TAINTMAP.iteritems(): if off1 not in v2[1]: config.TAINTMAP[k1][0].add(off1) #print k2,v2[1] tag = False break #print "passed..", off1 if len(set(vset1) ^ set(v2[1][off1])) > 3: #vset1 != v2[1][off1]: #print k2, vset1, v2[1][off1] config.TAINTMAP[k1][0].add(off1) tag = False break #print "passed set", vset1 if tag == True: config.MORECOMMON[off1] = vset1[:] #print config.MOSTCOMMON[off1] break # we just want to take one input and check if all the offsets in other inputs have commonality. print config.MOSTCOMMON, config.MORECOMMON #gw = raw_input("press enter") print "[*] taintflow finished."
def get_taint(dirin): ''' This function is used to get taintflow for each CMP instruction to find which offsets in the input are used at the instructions. It also gets the values used in the CMP.''' print "[*] starting taintflow calculation." files=os.listdir(dirin) #taintmap=dict()#this is a dictionary to keep taintmap of each input file. Key is the input file name and value is a tuple returned by read_taint, wherein 1st element is a set of all offsets used in cmp and 2nd elment is a dictionary with key a offset and value is a set of values at that offsetthat were found in CMP instructions. #mostcommon=dict()# this dictionary keeps offsets which are common across all the inputs with same value set. for fl in files: if fl in config.TAINTMAP: continue pfl=os.path.join(dirin,fl) rcode=execute2(pfl,fl) if rcode ==255: continue gau.die("pintool terminated with error 255 on input %s"%(pfl,)) config.TAINTMAP[fl]=read_taint(pfl) config.LEAMAP[fl]=read_lea() #print config.TAINTMAP[fl][1] #raw_input("press key..") if config.MOSTCOMFLAG==False: print "computing MOSTCOM calculation..." for k1,v1 in config.TAINTMAP.iteritems(): for off1,vset1 in v1[1].iteritems(): tag=True if off1 > config.MAXOFFSET: config.TAINTMAP[k1][0].add(off1) #print "[==] ",k1,off1 continue for k2,v2 in config.TAINTMAP.iteritems(): if off1 not in v2[1]: config.TAINTMAP[k1][0].add(off1) #print k2,v2[1] tag=False break #print "passed..", off1 if len(set(vset1) & set(v2[1][off1]))==0:#set(vset1) != set(v2[off1]) print k1, k2, off1, set(vset1), set(v2[1][off1]) config.TAINTMAP[k1][0].add(off1) tag=False break #print "passed set", vset1 if tag==True: config.MOSTCOMMON[off1]=vset1[:] #print "[++]",config.MOSTCOMMON[off1] break # we just want to take one input and check if all the offsets in other inputs have commonality. else: print "computing MORECOM calculation..." for k1,v1 in config.TAINTMAP.iteritems(): for off1,vset1 in v1[1].iteritems(): tag=True #if off1 > config.MAXOFFSET: #print k1,off1 # continue for k2,v2 in config.TAINTMAP.iteritems(): if off1 not in v2[1]: config.TAINTMAP[k1][0].add(off1) #print k2,v2[1] tag=False break #print "passed..", off1 if len(set(vset1) ^ set(v2[1][off1]))>3:#vset1 != v2[1][off1]: print k2, vset1, v2[1][off1] config.TAINTMAP[k1][0].add(off1) tag=False break #print "passed set", vset1 if tag==True: config.MORECOMMON[off1]=vset1[:] #print config.MOSTCOMMON[off1] break # we just want to take one input and check if all the offsets in other inputs have commonality. print "[*] taintflow finished."
def change_bytes_from_cmp(self, original, fl, cmp): if len(self.currentTaintMap) == 0: return original if self.currentMutation == -1: gautils.die("[-] Mutation not created : impossible !") return original buffer = list(original) if cmp.cmpSize == -1: #print "change_bytes: cmp.cmpSize == -1" return original bytesChanged = 0 mutationHistory = self.mutationHistory[fl][cmp.offsetsInInput[0]] if cmp.offsetsInInput[0] in self.mutationHistory[fl] and len( mutationHistory) == 1: mutationHistory.append(dict()) mutationHistoryData = mutationHistory[1] for offset in cmp.offsetsInInput: if offset >= len(buffer): continue mutationHistoryData.update({offset: []}) mutationHistoryDataCurrent = mutationHistoryData[offset] for i in range(0, cmp.cmpSize): currentOffset = int(offset + i) if currentOffset >= len(buffer): break if cmp.taintType == taintTypeEnum.UNKNOWN: # save the value mutationHistoryDataCurrent.append(buffer[currentOffset]) buffer[currentOffset] = self.currentMutation[i] bytesChanged += 1 elif cmp.taintType == taintTypeEnum.SINGLE_BYTE: # save the value mutationHistoryDataCurrent.append(buffer[currentOffset]) buffer[currentOffset] = self.currentMutation[0] bytesChanged += 1 elif cmp.taintType == taintTypeEnum.ARRAY: # save the value mutationHistoryDataCurrent.append(buffer[currentOffset]) # TODO : improve strategy buffer[currentOffset] = self.currentMutation[i] bytesChanged += 1 gautils.debug_print("[+] Mutation applied 0x%x to 0x%x" % (offset, offset + bytesChanged)) return ''.join([e for e in buffer])
def read_taint(fpath): ''' This function read cmp.out file and parses it to extract offsets and coresponding values and returns a tuple(alltaint, dict). dictionary: with key as offset and values as a set of hex values checked for that offset in the cmp instruction. Currently, we want to extract values s.t. one of the operands of CMP instruction is imm value for this set of values. ADDITION: we also read lea.out file to know offsets that were used in LEA instructions. There offsets are good candidates to fuzz with extreme values, like \xffffffff, \x80000000. ''' taintOff = dict( ) #dictionary to keep info about single tainted offsets and values. alltaintoff = set( ) #it keeps all the offsets (expluding the above case) that were used at a CMP instruction. fsize = os.path.getsize(fpath) offlimit = 0 #check if taint was generated, else exit if (os.path.getsize("cmp.out") == 0): gau.die("Empty cmp.out file! Perhaps taint analysis did not run...") cmpFD = open("cmp.out", "r") # each line of the cmp.out has the following format: #32 reg imm 0xb640fb9d {155} {155} {155} {155} {} {} {} {} 0xc0 0xff #g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 # we need a regexp to parse this string. if config.BIT64 == False: pat = re.compile( r"(\d+) ([a-z]+) ([a-z]+) (\w+) \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} (\w+) (\w+)", re.I) else: pat = re.compile( r"(\d+) ([a-z]+) ([a-z]+) (\w+) \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} (\w+) (\w+)", re.I) for ln in cmpFD: if offlimit > config.MAXFILELINE: break offlimit += 1 mat = pat.match(ln) try: # this is a check to see if CMP entry is complete. if config.BIT64 == False: rr = mat.group(14) else: rr = mat.group(22) except: continue if config.BIT64 == False: op1start = 5 op2start = 9 op1val = 13 op2val = 14 else: op1start = 5 op2start = 13 op1val = 21 op2val = 22 if config.ALLCMPOP == True: if mat.group(op1start) == '' and mat.group(op2start) != '': tempoff = get_non_empty(mat, op2start) #mat.group(9) if tempoff == -1: continue ofs, hexstr = extract_offsetStr(tempoff, mat.group(op1val), fsize) elif mat.group(op2start) == '' and mat.group(op1start) != '': tempoff = get_non_empty(mat, op1start) #mat.group(5) if tumpoff == -1: continue ofs, hexstr = extract_offsetStr(tempoff, mat.group(op2val), fsize) else: ofs, hexstr = (-1000, []) if ofs != -1000: if config.ALLBYTES == True or ( hexstr != '\xff\xff\xff\xff' and hexstr != '\x00'): #this is a special case if ofs not in taintOff: taintOff[ofs] = [ hexstr ] # we are going to change set to list for "last" offset checked. else: #if hexstr not in taintOff[ofs]: if config.ALLBYTES == True or isNonPrintable( hexstr) == False: taintOff[ofs].append(hexstr) else: alltaintoff.update(set(hexstr)) else: if mat.group(2) == 'imm': tempoff = get_non_empty(mat, op2start) #mat.group(9) if tempoff == -1: continue ofs, hexstr = extract_offsetStr(tempoff, mat.group(op1val), fsize) if ofs != -1000: if config.ALLBYTES == True or ( hexstr != '\xff\xff\xff\xff' and hexstr != '\x00'): #this is a special case if ofs not in taintOff: taintOff[ofs] = [ hexstr ] # we are going to change set to list for "last" offset checked. else: #if hexstr not in taintOff[ofs]: if config.ALLBYTES == True or isNonPrintable( hexstr) == False: taintOff[ofs].append(hexstr) else: #alltaintoff.update(set(offsets)) alltaintoff.update(set(hexstr)) elif mat.group(3) == 'imm': tempoff = get_non_empty(mat, op1start) #mat.group(5) if tempoff == -1: continue ofs, hexstr = extract_offsetStr(tempoff, mat.group(op2val), fsize) if ofs != -1000: if config.ALLBYTES == True or ( hexstr != '\xff\xff\xff\xff' and hexstr != '\x00'): #this is a special case if ofs not in taintOff: taintOff[ofs] = [ hexstr ] # we are going to change set to list for "last" offset checked. else: #if hexstr not in taintOff[ofs]: if config.ALLBYTES == True or isNonPrintable( hexstr) == False: taintOff[ofs].append(hexstr) else: alltaintoff.update(set(hexstr)) elif ((mat.group(2) == 'mem' and mat.group(3) == 'mem') or (mat.group(2) == 'reg' and mat.group(3) == 'reg')): #bylen=mat.group(1)/8 #if bylen == 1: #TOFIX: I am assuming that CMPS has second operand as constant and 1st operand is the byte from the input that we want to compare with 2nd operand. We need to handle the case when these operands are swapped. if mat.group(op1start) == '' and mat.group(op2start) != '': tempoff = get_non_empty(mat, op2start) #mat.group(9) if tempoff == -1: continue ofs, hexstr = extract_offsetStr(tempoff, mat.group(op1val), fsize) elif mat.group(op2start) == '' and mat.group(op1start) != '': tempoff = get_non_empty(mat, op1start) #mat.group(5) if tempoff == -1: continue ofs, hexstr = extract_offsetStr(tempoff, mat.group(op2val), fsize) else: ofs, hexstr = (-1000, []) if ofs != -1000: if config.ALLBYTES == True or ( hexstr != '\xff\xff\xff\xff' and hexstr != '\x00'): #this is a special case if ofs not in taintOff: taintOff[ofs] = [ hexstr ] # we are going to change set to list for "last" offset checked. else: #if hexstr not in taintOff[ofs]: if config.ALLBYTES == True or isNonPrintable( hexstr) == False: taintOff[ofs].append(hexstr) else: alltaintoff.update(set(hexstr)) else: tmpset = set() tmp1 = mat.group(op1start) if len(tmp1) > 0: tmpset.update(tmp1.split(',')) tmp2 = mat.group(op2start) if len(tmp2) > 0: tmpset.update(tmp2.split(',')) alltaintoff.update([int(o) for o in tmpset]) #alltaintoff.update(tmp1.split(','),tmp2.split(',')) #alltaintoff=set([int(o) for o in alltaintoff]) cmpFD.close() todel = set() for el in alltaintoff: if el > fsize - config.MINOFFSET: todel.add(el) for el in todel: alltaintoff.remove(el) #print '*',el alltaintoff.add(el - fsize) #alltaintoff.difference_update(taintOff) #print alltaintoff, taintOff return (alltaintoff, taintOff)
def main(): banner() # first lets create the base directorty to keep all temporary data try: shutil.rmtree(config.BASETMP) except OSError: pass if os.path.isdir(config.BASETMP) == False: os.mkdir(config.BASETMP) check_env() ## parse the arguments ######### parser = argparse.ArgumentParser(description='VUzzer options') parser.add_argument('-s', '--sut', help='SUT commandline', required=True) parser.add_argument( '-i', '--inputd', help='seed input directory (relative path)', required=True) parser.add_argument( '-w', '--weight', help='path of the pickle file(s) for BB wieghts (separated by comma, in case there are two) ', required=True) parser.add_argument( '-n', '--name', help='Path of the pickle file(s) containing strings from CMP inst (separated by comma if there are two).', required=True) parser.add_argument( '-l', '--libnum', help='Nunber of binaries to monitor (only application or used libraries)', required=False, default=1) parser.add_argument('-o', '--offsets', help='base-address of application and library (if used), separated by comma', required=False, default='0x00000000') parser.add_argument( '-b', '--libname', help='library name to monitor', required=False, default='#') args = parser.parse_args() config.SUT = args.sut config.INITIALD = os.path.join(config.INITIALD, args.inputd) config.LIBNUM = int(args.libnum) config.LIBTOMONITOR = args.libname config.LIBPICKLE = [w for w in args.weight.split(',')] config.NAMESPICKLE = [n for n in args.name.split(',')] config.LIBOFFSETS = [o for o in args.offsets.split(',')] config.LIBS = args.libname # this is just to find the index of the placeholder in BBCMD list to replace it with the libname ih = config.BBCMD.index("LIBS=") config.BBCMD[ih] = "LIBS=%s" % args.libname gau.log_print( "[*] Checking tmps files" ) if config.CLEARSPECIALOUTPUT: gau.delete_special_out_file(config.SPECIALOUTPUT) if path.exists("vuzzerRun.log"): os.remove("vuzzerRun.log") gau.log_print( "[*] Checking directories" ) if config.USEPATTERNDETECTION == True: initPatterns() ################################### afl.clearDir() config.minLength = get_min_file(config.INITIALD) try: shutil.rmtree(config.KEEPD) except OSError: pass os.mkdir(config.KEEPD) try: os.mkdir("outd") except OSError: pass try: os.mkdir("outd/crashInputs") except OSError: gau.emptyDir("outd/crashInputs") crashHash = [] try: os.mkdir(config.SPECIAL) except OSError: gau.emptyDir(config.SPECIAL) try: os.mkdir(config.INTER) except OSError: gau.emptyDir(config.INTER) gau.log_print( "[*] Checking executable base address" ) ############################################################################# # let us get the base address of the main executable. ifiles = os.listdir(config.INITIALD) for fl in ifiles: tfl = os.path.join(config.INITIALD, fl) try: f = open(tfl, 'r') f.close() except: gau.die("[-] Can not open our own input %s !" % (tfl)) (ibbs, iretc) = execute(tfl) if iretc != 128: # 0 gau.die("[-] Can't run the target program '%s' !" % (os.path.basename(config.SUT.replace(" ","").replace("%s","")))) break # we just want to run the executable once to get its load address imgOffFd = open("imageOffset.txt", 'r') for ln in imgOffFd: if "Main:" in ln: lst = ln.split() break config.LIBOFFSETS[0] = lst[1][:] imgOffFd.close() if config.LIBTOMONITOR != '' and config.LIBTOMONITOR != '#': gau.log_print("[+] Lib %s is at 0x%x" % (config.LIBTOMONITOR, int(config.LIBOFFSETS[1], 0))) gau.log_print( "[*] Checking pickles" ) ############################################################################# # open names pickle files gau.prepareBBOffsets() # lets initialize the BBFORPRUNE list from thie cALLBB set. if len(config.cALLBB) > 0: config.BBFORPRUNE = list(config.cALLBB) else: gau.log_print("[*] cALLBB is not initialized. something is wrong!!\n") system.exit() if config.PTMODE: pt = simplept.simplept() else: pt = None gau.log_print("[*] Running vuzzer for '%s'" % (os.path.basename(config.SUT.replace(" ","").replace("%s","")))) if config.ERRORBBON == True: gbb, bbb = dry_run() else: gbb = 0 # gau.die("dry run over..") import timing # selftest() noprogress = 0 currentfit = 0 lastfit = 0 config.CRASHIN.clear() stat = open("stats.log", 'w') stat.write("**** Fuzzing started at: %s ****\n" % (datetime.now().isoformat('+'),)) stat.write("**** Initial BB for seed inputs: %d ****\n" % (gbb,)) stat.flush() os.fsync(stat.fileno()) stat.write( "Genaration\t MINfit\t MAXfit\t AVGfit MINlen\t Maxlen\t AVGlen\t #BB\t AppCov\t AllCov\n") stat.flush() os.fsync(stat.fileno()) starttime = time.clock() allnodes = set() alledges = set() try: shutil.rmtree(config.INPUTD) except OSError: pass shutil.copytree(config.INITIALD, config.INPUTD) # fisrt we get taint of the intial inputs get_taint(config.INITIALD, 1) # print "MOst common offsets and values:", config.MOSTCOMMON # print "Base address: %s"%config.LIBOFFSETS[0] # raw_input("Press enter to continue..") config.MOSTCOMFLAG = True crashhappend = False filest = os.listdir(config.INPUTD) filenum = len(filest) if filenum < config.POPSIZE: gau.create_files(config.POPSIZE - filenum) gau.log_print( '[*] Population at start is about %d files' % (len(os.listdir(config.INPUTD)))) efd = open(config.ERRORS, "w") gau.prepareBBOffsets() writecache = True genran = 0 bbslide = 40 # this is used to call run_error_BB() functions keepslide = 3 keepfilenum = config.BESTP config.SEENBB.clear() # initialize set of BB seen so far, which is 0 del config.SPECIALENTRY[:] todelete = set() # temp set to keep file names that will be deleted in the special folder oldPrintSize = 0 while True: # print "[**] Generation %d\n***********"%(genran,) del config.TEMPTRACE[:] del config.BBSEENVECTOR[:] # this is set when a config.SPECIAL gets at least one new input per generation. SPECIALCHANGED = False config.TMPBBINFO.clear() config.TMPBBINFO.update(config.PREVBBINFO) fitnes = dict() execs = 0 config.cPERGENBB.clear() config.GOTSTUCK = False if config.ERRORBBON == True: if genran > config.GENNUM/5: bbslide = max(bbslide, config.GENNUM/20) keepslide = max(keepslide, config.GENNUM/100) keepfilenum = keepfilenum/2 if 0 < genran < config.GENNUM/5 and genran % keepslide == 0: copy_files(config.INPUTD, config.KEEPD, keepfilenum) # lets find out some of the error handling BBs if genran > 40 and genran % bbslide == 0: stat.write("\n**** Error BB cal started ****\n") stat.flush() os.fsync(stat.fileno()) run_error_bb(pt) copy_files(config.KEEPD, config.INPUTD, len(os.listdir(config.KEEPD))*1/10) # copy_files(config.INITIALD,config.INPUTD,1) files = os.listdir(config.INPUTD) per_gen_fnum = 0 for fl in files: per_gen_fnum += 1 tfl = os.path.join(config.INPUTD, fl) iln = os.path.getsize(tfl) args = (config.SUT % tfl).split(' ') progname = os.path.basename(args[0]) (bbs, retc) = execute(tfl) filecount = len(os.listdir(config.INPUTD)) inputname = os.path.basename(args[1]) per_current_stat = (float(per_gen_fnum)/float(filecount)) * 100 per_show = (int(float(filecount) / 10.0)) if per_show == 0: per_show = 10 if per_gen_fnum % per_show == 0 or per_gen_fnum == filecount: logStr = "[%s] Gen %d : %d%% Executed %d of %d" % (time.strftime("%H:%M:%S"),genran,int(per_current_stat),per_gen_fnum, filecount) if oldPrintSize > 0: dif = oldPrintSize - len(logStr) if dif > 0: logStr += " "*dif oldPrintSize = len(logStr) gau.log_print( logStr ) else: gau.log_print( ' '*oldPrintSize ) sys.stdout.write("\033[F") logStr = "[%s] Gen %d : %d%% Executed %d of %d [%s]" % (time.strftime("%H:%M:%S"),genran,int(per_current_stat),per_gen_fnum, filecount,inputname) if oldPrintSize > 0: dif = oldPrintSize - len(logStr) if dif > 0: logStr += " "*dif oldPrintSize = len(logStr) gau.log_print( logStr ) sys.stdout.write("\033[F") if config.BBWEIGHT == True: fitnes[fl] = gau.fitnesCal2(bbs, fl, iln) else: fitnes[fl] = gau.fitnesNoWeight(bbs, fl, iln) # raw_input() execs += 1 # let us prune the inputs(if at all), whose trace is subset of the new input just got executed. SPECIALADDED = False if config.GOTSPECIAL == True: SPECIALCHANGED = True SPECIALADDED = True todelete.clear() form_bitvector2(bbs, fl, config.BBFORPRUNE, config.SPECIALBITVECTORS) shutil.copy(tfl, config.SPECIAL) config.SPECIALENTRY.append(fl) for sfl, bitv in config.SPECIALBITVECTORS.iteritems(): if sfl == fl: continue if (config.SPECIALBITVECTORS[fl] & bitv) == bitv: tpath = os.path.join(config.SPECIAL, sfl) os.remove(tpath) todelete.add(sfl) config.SPECIALENTRY.remove(sfl) if sfl in config.TAINTMAP: del config.TAINTMAP[sfl] for ele in todelete: del config.SPECIALBITVECTORS[ele] if retc < 0 and retc != -2: efd.write("%s: %d\n" % (tfl, retc)) efd.flush() os.fsync(efd) tmpHash = sha1OfFile(config.CRASHFILE) if tmpHash not in crashHash: crashHash.append(tmpHash) tnow = datetime.now().isoformat().replace(":", "-") nf = "%s-%s.%s" % (progname, tnow, gau.splitFilename(fl)[1]) npath = os.path.join("outd/crashInputs", nf) shutil.copyfile(tfl, npath) if SPECIALADDED == False: shutil.copy(tfl, config.SPECIAL) config.CRASHIN.add(fl) if config.STOPONCRASH == True: # efd.close() crashhappend = True break fitscore = [v for k, v in fitnes.items()] maxfit = max(fitscore) avefit = sum(fitscore)/len(fitscore) mnlen, mxlen, avlen = gau.getFileMinMax(config.INPUTD) gau.log_print( "[*] Done with all input in Gen %d" % (genran) ) gau.log_print( "[*] Calculating code coverage" ) appcov, allcov = gau.calculateCov() tnow = datetime.now().isoformat().replace(":", "-") # stat.write("\t%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %s\n"%(genran,min(fitscore),maxfit,avefit,mnlen,mxlen,avlen,len(config.cPERGENBB),appcov,allcov,tnow)) stat.write("\t%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %s\n" % (genran, min( fitscore), maxfit, avefit, mnlen, mxlen, avlen, len(config.SEENBB), appcov, allcov, tnow)) stat.flush() os.fsync(stat.fileno()) gau.log_print( "[*] Wrote to stat.log" ) seenBB = len(config.SEENBB) codeCovStr = "[+] BB Code coverage is %d" % (seenBB) if config.LASTTURNCODECOV != 0: difLastTurnStr = "(no new BB found)" codeCovDif = seenBB - config.LASTTURNCODECOV if codeCovDif > 0: difLastTurnStr = "\033[0;32m(+%d BB)\033[0m" % (codeCovDif) elif codeCovDif < 0: difLastTurnStr = "\033[0;31m(-%d BB)\033[0m" % (codeCovDif) perBBTotal = (float(seenBB) / float(len(config.ALLBB))) * 100 difGoodBB = seenBB - len(config.GOODBB) difGoodBBStr = "no" if difGoodBB > 0: difGoodBBStr = "+%d" % (difGoodBB) elif difGoodBB < 0: difGoodBBStr = "-%d" % (difGoodBB) codeCovStr += " %s (%d%% of all BB) (%s BB dif with good path)" % (difLastTurnStr, perBBTotal, difGoodBBStr) config.LASTTURNCODECOV = seenBB gau.log_print( "-"*len(codeCovStr) ) gau.log_print( codeCovStr ) gau.log_print( "-"*len(codeCovStr) ) if crashhappend == True: break # lets find out some of the error handling BBs # if genran >20 and genran%5==0: # run_error_bb(pt) genran += 1 config.CURRENTGEN += 1 # this part is to get initial fitness that will be used to determine if fuzzer got stuck. lastfit = currentfit # currentfit=maxfit currentfit = len(config.SEENBB) # lastfit-config.FITMARGIN < currentfit < lastfit+config.FITMARGIN: if currentfit == lastfit: noprogress += 1 else: noprogress = 0 if noprogress > 20: config.GOTSTUCK = True stat.write("Heavy mutate happens now..\n") noprogress = 0 if (genran >= config.GENNUM) and (config.STOPOVERGENNUM == True): break if len(os.listdir(config.SPECIAL)) > 0 and SPECIALCHANGED == True: if len(os.listdir(config.SPECIAL)) < config.NEWTAINTFILES: get_taint(config.SPECIAL) else: try: os.mkdir(config.TAINTTMP) except OSError: gau.emptyDir(config.TAINTTMP) if conditional_copy_files(config.SPECIAL, config.TAINTTMP, config.NEWTAINTFILES) == 0: get_taint(config.TAINTTMP) # print "MOst common offsets and values:", config.MOSTCOMMON # gg=raw_input("press any key to continue..") gau.log_print( "[*] Going for new generation creation" ) gau.createNextGeneration3(fitnes, genran) # raw_input("press any key...") efd.close() stat.close() libfd_mm.close() libfd.close() endtime = time.clock() gau.log_print( "[**] Totol time %f sec" % (endtime-starttime,) ) gau.log_print( "[**] Fuzzing done. Check %s to see if there were crashes" % ( config.ERRORS,))
def get_taint(dirin, is_initial=0): ''' This function is used to get taintflow for each CMP instruction to find which offsets in the input are used at the instructions. It also gets the values used in the CMP.''' files = os.listdir(dirin) newTaint = [] for f in files: if f not in config.TAINTMAP: newTaint.append(f) gau.log_print("[*] Starting taintflow calculation for %d files" % (len(newTaint))) for fl in files: if fl in config.TAINTMAP: continue pfl = os.path.abspath(os.path.join(dirin, fl)) if is_initial == 1: tnow1 = datetime.now() gau.log_print( "[*] Launching taint analysis of %s" % ( os.path.basename(fl) ) ) rcode = execute2(pfl, fl, is_initial) if is_initial == 1: tnow2 = datetime.now() config.TIMEOUT = max( config.TIMEOUT, 2*((tnow2-tnow1).total_seconds())) if rcode == 255: gau.die("pintool terminated with error 255 on input %s" % (pfl,)) continue config.TAINTMAP[fl] = read_taint(pfl) config.LEAMAP[fl] = read_lea() gau.log_print("[*] Taintflow parsing done") if config.MOSTCOMFLAG == False: gau.log_print("[*] Computing MOSTCOMMON calculation") tmpTaintMap = config.TAINTMAP.copy() for file, values in tmpTaintMap.iteritems(): for cmp in values[1]: offset = cmp.offsetsInInput[0] foundInAll = True foundMap = list() for file2, values2 in tmpTaintMap.iteritems(): foundInThisFile = False for cmp2 in values2[1]: if offset in cmp2.offsetsInInput and cmp.taintValue == cmp2.taintValue: foundInThisFile = True foundMap.append(foundInThisFile) if foundInThisFile == False: break for file1 in foundMap: if file1 == False: foundInAll = False if foundInAll == True: config.MOSTCOMMON.append(cmp) # delete it from taint map config.TAINTMAP[file][1].remove(cmp) break else: gau.log_print("[*] Computing MORECOMMON calculation") tmpTaintMap = config.TAINTMAP.copy() for file, values in tmpTaintMap.iteritems(): for cmp in values[1]: offset = cmp.offsetsInInput[0] foundInAll = True foundMap = list() for file2, values2 in tmpTaintMap.iteritems(): foundInThisFile = False for cmp2 in values2[1]: if offset in cmp2.offsetsInInput: dif = cmp.taintValue - cmp2.taintValue if dif < 0: dif *= -1 if dif < 3: foundInThisFile = True foundMap.append(foundInThisFile) if foundInThisFile == False: break for file1 in foundMap: if file1 == False: foundInAll = False if foundInAll == True: config.MORECOMMON.append(cmp) # delete it from taint map config.TAINTMAP[file][1].remove(cmp) break #print '------------------------------------------------------------------------------' if len(config.MOSTCOMMON) > 0: gau.log_print("[*] MOSTCOMMON size is about %d cmps" % (len(config.MOSTCOMMON))) #for cmp in config.MOSTCOMMON: # print cmp if len(config.MORECOMMON) > 0: gau.log_print("[*] MORECOMMON size is about %d cmps" % (len(config.MORECOMMON))) #for cmp in config.MORECOMMON: # print cmp gau.log_print("[*] Taintflow finished")
def read_taint(fpath): ''' This function read cmp.out file and parses it to extract offsets and coresponding values and returns a tuple(alltaint, dict). dictionary: with key as offset and values as a set of hex values checked for that offset in the cmp instruction. Currently, we want to extract values s.t. one of the operands of CMP instruction is imm value for this set of values. ADDITION: we also read lea.out file to know offsets that were used in LEA instructions. There offsets are good candidates to fuzz with extreme values, like \xffffffff, \x80000000. ''' #print "--------------------------------------------" gau.log_print( "[*] Parsing taint analysis of %s" % ( os.path.basename(fpath) ) ) exploitableTaintedOffset = dict() allTaintedOffsets = dict() fsize = os.path.getsize(fpath) offlimit = 0 # check if taint was generated, else exit if (os.path.getsize("cmp.out") == 0): gau.die("[-] Empty cmp.out file! Perhaps taint analysis did not run") cmpFD = open("cmp.out", "r") # each line of the cmp.out has the following format: # 32 reg imm 0xb640fb9d {155} {155} {155} {155} {} {} {} {} 0xc0 0xff # g1 g2 g3 g4 g5 g6 g7 g8 g9 g10 g11 g12 g13 g14 # we need a regexp to parse this string. if config.BIT64 == False: pat = re.compile( r"(\d+) ([a-z]+) ([a-z]+) (\w+) \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} (\w+) (\w+)", re.I) else: pat = re.compile( r"(\d+) ([a-z]+) ([a-z]+) (\w+) \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} \{([0-9,]*)\} (\w+) (\w+)", re.I) cmpO.id = 0 goodToProcessList = list() allTaintedOffsetsList = list() if config.FILTERDUPLICATED: rawCmpLines = set() else: rawCmpLines = list() cmpoutLineCount = 0 # remove duplicates for line in cmpFD: cmpoutLineCount += 1 if config.FILTERDUPLICATED: if line not in rawCmpLines: rawCmpLines.add(line) else: rawCmpLines.append(line) cmpFD.close() #print "original cmp.out size : " + str(cmpoutLineCount) #print "cmp.out after size : " + str(len(rawCmpLines)) allCmps = list() for ln in rawCmpLines: if offlimit > config.MAXFILELINE: break offlimit += 1 mat = pat.match(ln) try: # this is a check to see if CMP entry is complete. if config.BIT64 == False: rr = mat.group(14) else: rr = mat.group(22) except: continue cmp = cmpO.cmpOperation(mat) # TODO : if cmp.offset == -1 # the cmp is not valid or will not be handle if len(cmp.offsetsInInput) == 0: continue # save each valid cmp allCmps.append(cmp) # detect any pattern in cmp.out if config.USEPATTERNDETECTION == True: gau.log_print("[*] Running pattern detection for %s" % ( os.path.basename(fpath) )) # search and apply patterns allCmps = detectPatterns(allCmps, fpath) for cmp in allCmps: # if the cmp is valid and good to be used with taint based changes if cmp.isGoodToTaintChanges == True: goodToProcessList.append(cmp) # if it's just a normal cmp else: allTaintedOffsetsList.append(cmp) gau.debug_print("[*] allTaintedOffsetsList size : %d" % (len(allTaintedOffsetsList))) gau.debug_print("[*] goodToProcessList size : %d" % (len(goodToProcessList))) return (allTaintedOffsetsList, goodToProcessList)
def getInputTaintedOffsets(self, matchCmpLine): """ Get tainted offset of cmp, and define if it's good to taint-based changed """ # read tainted offsets foundStartOffsetOpe1 = self.getTaintedStartOffset( matchCmpLine, self.op1start) foundStartOffsetOpe2 = self.getTaintedStartOffset( matchCmpLine, self.op2start) if foundStartOffsetOpe1 != -1: self.offsetListOpe1 = self.getTaintedOffsets( matchCmpLine, foundStartOffsetOpe1, self.op1start) if foundStartOffsetOpe2 != -1: self.offsetListOpe2 = self.getTaintedOffsets( matchCmpLine, foundStartOffsetOpe2, self.op2start) taintOffsetList = list() if self.operand1 == 'imm' and self.operand2 == 'imm': gautils.die("[-] Immediate compared to immediate : impossible !") # check for valid cmp with immediat value that we can collect and apply to input elif self.operand1 == 'imm': if config.ALLBYTES == True or (self.valueOperand1 != 0xffffffff and self.valueOperand1 != 0x00): # TODO : improve the taint selection if len(self.offsetListOpe2) < 5 and self.isNonPrintable( self.valueOperand1) == False: # define which operator we want self.isGoodToTaintChanges = True taintOffsetList = self.offsetListOpe2 self.taintValue = self.valueOperand1 elif self.operand2 == 'imm': if config.ALLBYTES == True or (self.valueOperand2 != 0xffffffff and self.valueOperand2 != 0x00): # TODO : improve the taint selection if len(self.offsetListOpe1) < 5 and self.isNonPrintable( self.valueOperand2) == False: self.isGoodToTaintChanges = True taintOffsetList = self.offsetListOpe1 self.taintValue = self.valueOperand2 elif ((self.operand1 == 'mem' and self.operand2 == 'mem') or (self.operand1 == 'reg' and self.operand2 == 'reg')): selectedValue = -1 if foundStartOffsetOpe1 != -1 and len(self.offsetListOpe1) > 0: selectedValue = self.valueOperand1 taintOffsetList = self.offsetListOpe1 elif foundStartOffsetOpe2 != -1 and len(self.offsetListOpe2) > 0: selectedValue = self.valueOperand2 taintOffsetList = self.offsetListOpe2 if selectedValue != -1 and (config.ALLBYTES == True or (selectedValue != 0xffffffff and selectedValue != 0x00)): if self.isNonPrintable(selectedValue) == False: self.isGoodToTaintChanges = True self.taintValue = selectedValue # we don't handle this case in taint changes # so will end in allTaintedOffsets if self.isGoodToTaintChanges == False: # NOTE : self.taintType will be set twince, but it's not used in not taint changes if foundStartOffsetOpe1 != -1: self.getTaintedOffsetFromType(self.offsetListOpe1) if foundStartOffsetOpe2 != -1: self.getTaintedOffsetFromType(self.offsetListOpe2) # offset taint with immediate value that will be applied on the input # so will en in goodTaintedOffset else: if len(taintOffsetList) == 0: gautils.die("[-] Something goes wrong while parsing '%s'" % self) self.getTaintedOffsetFromType(taintOffsetList)
def dry_run(): ''' this function executes the initial test set to determine error handling BBs in the SUT. Such BBs are given zero weights during actual fuzzing. ''' print "[*] Starting dry run now..." tempbad = [] dfiles = os.listdir(config.INITIALD) if len(dfiles) < 3: gau.die("not sufficient initial files") for fl in dfiles: tfl = os.path.join(config.INITIALD, fl) try: f = open(tfl, 'r') f.close() except: gau.die("can not open our own input %s!" % (tfl, )) #ret=execute_without_analysis(tfl) (bbs, retc) = execute(tfl) if retc < 0: gau.die("looks like we already got a crash!!") config.GOODBB |= set(bbs.keys()) print "[*] Finished good inputs (%d)" % (len(config.GOODBB), ) #now lets run SUT of probably invalid files. For that we need to create them first. print "[*] Starting bad inputs.." lp = 0 badbb = set() while lp < 2: try: shutil.rmtree(config.INPUTD) shutil.rmtree(config.BUGD) except OSError: pass try: os.mkdir(config.INPUTD) except: gau.emptyDir(config.INPUTD) try: os.mkdir(config.BUGD) except: gau.emptyDir(config.BUGD) gau.create_files_dry(30) dfiles = os.listdir(config.INPUTD) for fl in dfiles: tfl = os.path.join(config.INPUTD, fl) (bbs, retc) = execute(tfl) if retc < 0: gau.die("looks like we already got a crash!!") tempbad.append(set(bbs.keys()) - config.GOODBB) tempcomn = set(tempbad[0]) for di in tempbad: tempcomn.intersection_update(set(di)) badbb.update(tempcomn) lp += 1 config.ERRORBBALL = badbb.copy() print "[*] finished common BB. TOtal such BB: %d" % (len(badbb), ) for ebb in config.ERRORBBALL: print "error bb: 0x%x" % (ebb, ) time.sleep(5) if config.LIBNUM == 2: baseadr = config.LIBOFFSETS[1] for ele in tempcomn: if ele < baseadr: config.ERRORBBAPP.add(ele) else: config.ERRORBBLIB.add(ele - baseadr) del tempbad del badbb #del tempgood return len(config.GOODBB), len(config.ERRORBBALL)
def main(): # first lets create the base directorty to keep all temporary data try: shutil.rmtree(config.BASETMP) except OSError: pass if os.path.isdir(config.BASETMP) == False: os.mkdir(config.BASETMP) check_env() ## parse the arguments ######### parser = argparse.ArgumentParser(description='VUzzer options') parser.add_argument( '-s', '--sut', help='SUT commandline with %s as placeholder for SUT input', required=True) parser.add_argument('-i', '--inputd', help='seed input directory (relative path)', required=True) parser.add_argument( '-w', '--weight', help= 'path of the pickle file(s) for BB wieghts (separated by comma, in case there are two) ', required=True) parser.add_argument( '-n', '--name', help= 'Path of the pickle file(s) containing strings from CMP inst (separated by comma if there are two).', required=True) parser.add_argument( '-l', '--libnum', help= 'Nunber of binaries to monitor (only application or used libraries)', required=False, default=1) parser.add_argument( '-o', '--offsets', help= 'base-address of application and library (if used), separated by comma', required=False, default='0x0000000000000000') parser.add_argument('-b', '--libname', help='library name to monitor', required=False, default='') args = parser.parse_args() config.SUT = args.sut config.INITIALD = os.path.join(config.INITIALD, args.inputd) config.LIBNUM = int(args.libnum) config.LIBTOMONITOR = args.libname config.LIBPICKLE = [w for w in args.weight.split(',')] config.NAMESPICKLE = [n for n in args.name.split(',')] config.LIBOFFSETS = [o for o in args.offsets.split(',')] config.LIBS = args.libname #ih=config.BBCMD.index("LIBS=") # this is just to find the index of the placeholder in BBCMD list to replace it with the libname ih = config.BBCMD.index( "#" ) # this is just to find the index of the placeholder in BBCMD list to replace it with the libname #config.BBCMD[ih]="LIBS=%s" % args.libname config.BBCMD[ih] = args.libname ################################### config.minLength = get_min_file(config.INITIALD) try: shutil.rmtree(config.KEEPD) except OSError: pass os.mkdir(config.KEEPD) try: os.mkdir("outd") except OSError: pass try: os.mkdir("outd/crashInputs") except OSError: gau.emptyDir("outd/crashInputs") crashHash = [] try: os.mkdir(config.SPECIAL) except OSError: gau.emptyDir(config.SPECIAL) try: os.mkdir(config.INTER) except OSError: gau.emptyDir(config.INTER) ############################################################################# #let us get the base address of the main executable. ifiles = os.listdir(config.INITIALD) for fl in ifiles: tfl = os.path.join(config.INITIALD, fl) try: f = open(tfl, 'r') f.close() except: gau.die("can not open our own input %s!" % (tfl, )) (ibbs, iretc) = execute(tfl) break # we just want to run the executable once to get its load address imgOffFd = open("imageOffset.txt", 'r') for ln in imgOffFd: if "Main:" in ln: lst = ln.split() break config.LIBOFFSETS[0] = lst[1][:] imgOffFd.close() ############################################################################# ###### open names pickle files gau.prepareBBOffsets() # lets initialize the BBFORPRUNE list from thie cALLBB set. if len(config.cALLBB) > 0: config.BBFORPRUNE = list(config.cALLBB) else: print "[*]: cALLBB is not initialized. something is wrong!!\n" system.exit() if config.PTMODE: pt = simplept.simplept() else: pt = None if config.ERRORBBON == True: gbb, bbb = dry_run() else: gbb = 0 # gau.die("dry run over..") import timing #selftest() noprogress = 0 currentfit = 0 lastfit = 0 config.CRASHIN.clear() stat = open("stats.log", 'w') stat.write("**** Fuzzing started at: %s ****\n" % (datetime.now().isoformat('+'), )) stat.write("**** Initial BB for seed inputs: %d ****\n" % (gbb, )) stat.flush() os.fsync(stat.fileno()) stat.write( "Genaration\t MINfit\t MAXfit\t AVGfit MINlen\t Maxlen\t AVGlen\t #BB\t AppCov\t AllCov\n" ) stat.flush() os.fsync(stat.fileno()) starttime = time.clock() allnodes = set() alledges = set() try: shutil.rmtree(config.INPUTD) except OSError: pass shutil.copytree(config.INITIALD, config.INPUTD) # fisrt we get taint of the intial inputs get_taint(config.INITIALD, 1) #print "MOst common offsets and values:", config.MOSTCOMMON #print "Base address: %s"%config.LIBOFFSETS[0] #raw_input("Press enter to continue..") config.MOSTCOMFLAG = True crashhappend = False filest = os.listdir(config.INPUTD) filenum = len(filest) if filenum < config.POPSIZE: gau.create_files(config.POPSIZE - filenum) if len(os.listdir(config.INPUTD)) != config.POPSIZE: gau.die("something went wrong. number of files is not right!") efd = open(config.ERRORS, "w") gau.prepareBBOffsets() writecache = True genran = 0 bbslide = 100 # this is used to call run_error_BB() functions. currently, i have decided to not call it thus a long wait keepslide = 3 keepfilenum = config.BESTP config.SEENBB.clear() #initialize set of BB seen so far, which is 0 del config.SPECIALENTRY[:] todelete = set( ) #temp set to keep file names that will be deleted in the special folder while True: #print "[**] Generation %d\n***********"%(genran,) del config.TEMPTRACE[:] del config.BBSEENVECTOR[:] SPECIALCHANGED = False # this is set when a config.SPECIAL gets at least one new input per generation. config.TMPBBINFO.clear() config.TMPBBINFO.update(config.PREVBBINFO) fitnes = dict() execs = 0 config.cPERGENBB.clear() config.GOTSTUCK = False if config.ERRORBBON == True: if genran > config.GENNUM / 5: bbslide = max(bbslide, config.GENNUM / 20) keepslide = max(keepslide, config.GENNUM / 100) keepfilenum = keepfilenum / 2 if 0 < genran < config.GENNUM / 5 and genran % keepslide == 0: copy_files(config.INPUTD, config.KEEPD, keepfilenum) #lets find out some of the error handling BBs if genran > 2000 and genran % bbslide == 0: # large number 2000 is to prevent not starting intermediate error BB cal. it is expensive and I am working on it. stat.write("\n**** Error BB cal started ****\n") stat.flush() os.fsync(stat.fileno()) run_error_bb(pt) copy_files(config.KEEPD, config.INPUTD, len(os.listdir(config.KEEPD)) * 1 / 10) #copy_files(config.INITIALD,config.INPUTD,1) files = os.listdir(config.INPUTD) per_gen_fnum = 0 for fl in files: per_gen_fnum += 1 tfl = os.path.join(config.INPUTD, fl) iln = os.path.getsize(tfl) args = (config.SUT % tfl).split(' ') progname = os.path.basename(args[0]) (bbs, retc) = execute(tfl) if per_gen_fnum % 10 == 0: print "[**] Gen: %d. Executed %d of %d.**" % ( genran, per_gen_fnum, config.POPSIZE) if config.BBWEIGHT == True: fitnes[fl] = gau.fitnesCal2(bbs, fl, iln) else: fitnes[fl] = gau.fitnesNoWeight(bbs, fl, iln) #raw_input() execs += 1 #let us prune the inputs(if at all), whose trace is subset of the new input just got executed. SPECIALADDED = False if config.GOTSPECIAL == True: SPECIALCHANGED = True SPECIALADDED = True todelete.clear() form_bitvector2(bbs, fl, config.BBFORPRUNE, config.SPECIALBITVECTORS) shutil.copy(tfl, config.SPECIAL) config.SPECIALENTRY.append(fl) for sfl, bitv in config.SPECIALBITVECTORS.iteritems(): if sfl == fl: continue if (config.SPECIALBITVECTORS[fl] & bitv) == bitv: tpath = os.path.join(config.SPECIAL, sfl) os.remove(tpath) todelete.add(sfl) config.SPECIALENTRY.remove(sfl) if sfl in config.TAINTMAP: del config.TAINTMAP[sfl] for ele in todelete: del config.SPECIALBITVECTORS[ele] if retc < 0 and retc != -2: #print "[*]Error code is %d"%(retc,) efd.write("%s: %d\n" % (tfl, retc)) efd.flush() os.fsync(efd) tmpHash = sha1OfFile(config.CRASHFILE) if tmpHash not in crashHash: crashHash.append(tmpHash) tnow = datetime.now().isoformat().replace(":", "-") nf = "%s-%s.%s" % (progname, tnow, gau.splitFilename(fl)[1]) npath = os.path.join("outd/crashInputs", nf) shutil.copyfile(tfl, npath) if SPECIALADDED == False: shutil.copy(tfl, config.SPECIAL) config.CRASHIN.add(fl) if config.STOPONCRASH == True: #efd.close() crashhappend = True break fitscore = [v for k, v in fitnes.items()] maxfit = max(fitscore) avefit = sum(fitscore) / len(fitscore) mnlen, mxlen, avlen = gau.getFileMinMax(config.INPUTD) print "[*] Done with all input in Gen, starting SPECIAL. \n" appcov, allcov = gau.calculateCov() tnow = datetime.now().isoformat().replace(":", "-") #stat.write("\t%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %s\n"%(genran,min(fitscore),maxfit,avefit,mnlen,mxlen,avlen,len(config.cPERGENBB),appcov,allcov,tnow)) stat.write("\t%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %s\n" % (genran, min(fitscore), maxfit, avefit, mnlen, mxlen, avlen, len(config.SEENBB), appcov, allcov, tnow)) stat.flush() os.fsync(stat.fileno()) print "[*] Wrote to stat.log\n" if crashhappend == True: break #lets find out some of the error handling BBs #if genran >20 and genran%5==0: # run_error_bb(pt) genran += 1 #this part is to get initial fitness that will be used to determine if fuzzer got stuck. lastfit = currentfit #currentfit=maxfit currentfit = len(config.SEENBB) if currentfit == lastfit: #lastfit-config.FITMARGIN < currentfit < lastfit+config.FITMARGIN: noprogress += 1 else: noprogress = 0 if noprogress > 20: config.GOTSTUCK = True stat.write("Heavy mutate happens now..\n") noprogress = 0 if (genran >= config.GENNUM) and (config.STOPOVERGENNUM == True): break if len(os.listdir(config.SPECIAL)) > 0 and SPECIALCHANGED == True: if len(os.listdir(config.SPECIAL)) < config.NEWTAINTFILES: get_taint(config.SPECIAL) else: try: os.mkdir(config.TAINTTMP) except OSError: gau.emptyDir(config.TAINTTMP) if conditional_copy_files(config.SPECIAL, config.TAINTTMP, config.NEWTAINTFILES) == 0: get_taint(config.TAINTTMP) #print "MOst common offsets and values:", config.MOSTCOMMON #gg=raw_input("press any key to continue..") print "[*] Going for new generation creation.\n" gau.createNextGeneration3(fitnes, genran) #raw_input("press any key...") efd.close() stat.close() libfd_mm.close() libfd.close() endtime = time.clock() print "[**] Totol time %f sec." % (endtime - starttime, ) print "[**] Fuzzing done. Check %s to see if there were crashes.." % ( config.ERRORS, )
def main(): # first lets create the base directorty to keep all temporary data try: shutil.rmtree(config.BASETMP) except OSError: pass if os.path.isdir(config.BASETMP) == False: os.mkdir(config.BASETMP) check_env() ## parse the arguments ######### parser = argparse.ArgumentParser(description='VUzzer options') parser.add_argument('-s', '--sut', help='SUT commandline', required=True) parser.add_argument('-i', '--inputd', help='seed input directory (relative path)', required=True) parser.add_argument( '-w', '--weight', help= 'path of the pickle file(s) for BB wieghts (separated by comma, in case there are two) ', required=True) parser.add_argument( '-n', '--name', help= 'Path of the pickle file(s) containing strings from CMP inst (separated by comma if there are two).', required=True) parser.add_argument( '-l', '--libnum', help= 'Nunber of binaries to monitor (only application or used libraries)', required=False, default=1) parser.add_argument( '-o', '--offsets', help= 'base-address of application and library (if used), separated by comma', required=False, default='0x00000000') parser.add_argument('-b', '--libname', help='library name to monitor', required=False, default='') args = parser.parse_args() config.SUT = args.sut config.INITIALD = os.path.join(config.INITIALD, args.inputd) config.LIBNUM = int(args.libnum) config.LIBTOMONITOR = args.libname config.LIBPICKLE = [w for w in args.weight.split(',')] config.NAMESPICKLE = [n for n in args.name.split(',')] config.LIBOFFSETS = [o for o in args.offsets.split(',')] ih = config.PINCMD.index("#") config.PINCMD[ih] = args.libname ################################### config.minLength = get_min_file(config.INITIALD) try: shutil.rmtree(config.KEEPD) except OSError: pass os.mkdir(config.KEEPD) try: os.mkdir("outd") except OSError: pass try: os.mkdir("outd/crashInputs") except OSError: gau.emptyDir("outd/crashInputs") try: os.mkdir("outd/hangs") except OSError: gau.emptyDir("outd/hangs") try: os.mkdir("outd/temp") except: gau.emptyDir("outd/temp") crashHash = [] try: os.mkdir(config.SPECIAL) except OSError: gau.emptyDir(config.SPECIAL) try: os.mkdir(config.INTER) except OSError: gau.emptyDir(config.INTER) ###### open names pickle files gau.prepareBBOffsets() if len(config.cALLBB) > 0: config.BBFORPRUNE = list(config.cALLBB) else: print "[*]: cALLBB is not initialized. something is wrong!!\n" system.exit() if config.PTMODE: pt = simplept.simplept() else: pt = None if config.ERRORBBON == True: gbb, bbb = dry_run() else: gbb = 0 # gau.die("dry run over..") import timing #selftest() noprogress = 0 currentfit = 0 lastfit = 0 config.CRASHIN.clear() stat = open("stats.log", 'w') stat.write("**** Fuzzing started at: %s ****\n" % (datetime.now().isoformat('+'), )) stat.write("**** Initial BB for seed inputs: %d ****\n" % (gbb, )) stat.flush() os.fsync(stat.fileno()) stat.write( "Genaration\t MINfit\t MAXfit\t AVGfit MINlen\t Maxlen\t AVGlen\t #BB\t AppCov\t AllCov\n" ) stat.flush() os.fsync(stat.fileno()) starttime = time.clock() allnodes = set() alledges = set() try: shutil.rmtree(config.INPUTD) #shutil.rmtree(config.BUGD) except OSError: pass shutil.copytree(config.INITIALD, config.INPUTD) # fisrt we get taint of the intial inputs get_taint(config.INITIALD, 1) print "MOst common offsets and values:", config.MOSTCOMMON #gg=raw_input("press enter to continue..") #gau_new.initialFuzz() config.MOSTCOMFLAG = True crashhappend = False filest = os.listdir(config.INPUTD) filenum = len(filest) if filenum < config.POPSIZE: gau.create_files(config.POPSIZE - filenum) if len(os.listdir(config.INPUTD)) != config.POPSIZE: gau.die("something went wrong. number of files is not right!") efd = open(config.ERRORS, "w") gau.prepareBBOffsets() writecache = True genran = 0 bbslide = 10 # this is used to call run_error_BB() functions keepslide = 3 keepfilenum = config.BESTP config.SEENBB.clear() del config.SPECIALENTRY[:] todelete = set() inputs_new_run = [] while True: print "[**] Generation %d\n***********" % (genran, ) #del config.SPECIALENTRY[:] del config.TEMPTRACE[:] del config.BBSEENVECTOR[:] #config.SEENBB.clear() SPECIALCHANGED = False # this is set when at least one new input is added to the config.SPECIAL folder. config.TMPBBINFO.clear() config.TMPBBINFO.update(config.PREVBBINFO) fitnes = dict() execs = 0 config.cPERGENBB.clear() config.GOTSTUCK = False if config.ERRORBBON == True: if genran > config.GENNUM / 5: bbslide = max(bbslide, config.GENNUM / 20) keepslide = max(keepslide, config.GENNUM / 100) keepfilenum = keepfilenum / 2 #config.cPERGENBB.clear() #config.GOTSTUCK=False #if 0< genran < config.GENNUM/5 and genran%keepslide == 0: # copy_files(config.INPUTD,config.KEEPD,keepfilenum) #lets find out some of the error handling BBs if genran > 20 and genran % bbslide == 0: stat.write("\n**** Error BB cal started ****\n") stat.flush() os.fsync(stat.fileno()) #run_error_bb(pt) #copy_files(config.KEEPD,config.INPUTD,len(os.listdir(config.KEEPD))*1/10) #copy_files(config.INITIALD,config.INPUTD,1) if genran == 1 or genran % 10 == 0: #f_log = open('out.txt', 'a') #print >> f_log, genran, inputs_new_run #f_log.close() gau.createBufferOverflowinputs(inputs_new_run) gau.createIntegerOverflowInputs(inputs_new_run) #gau.createMallocInputs(inputs_new_run) inputs_new_run = [] files = os.listdir(config.BUGD) for fl in files: tfl = os.path.join(config.BUGD, fl) iln = os.path.getsize(tfl) args = (config.SUT % tfl).split(' ') progname = os.path.basename(args[0]) #print '' #print 'Input file sha1:', sha1OfFile(tfl) #print 'Going to call:', ' '.join(args) retc = execute_without_analysis(tfl) if retc == None: npath = os.path.join("outd/hangs", fl) shutil.copyfile(tfl, npath) os.remove(tfl) continue #raw_input() execs += 1 #print "** %s: %d"%(fl,fitnes[fl]) if retc < 0 and retc != -2: shutil.copy(tfl, config.INPUTD) files = os.listdir(config.INPUTD) for fl in files: tfl = os.path.join(config.INPUTD, fl) iln = os.path.getsize(tfl) args = (config.SUT % tfl).split(' ') progname = os.path.basename(args[0]) #print '' #print 'Input file sha1:', sha1OfFile(tfl) #print 'Going to call:', ' '.join(args) (bbs, retc) = execute(tfl) if os.path.exists(os.path.join("outd/hangs", fl)): continue if config.BBWEIGHT == True: fitnes[fl] = gau.fitnesCal2(bbs, fl, iln) else: fitnes[fl] = gau.fitnesNoWeight(bbs, fl, iln) #raw_input() execs += 1 SPECIALADDED = False if config.GOTSPECIAL == True: #spinputs=os.listdir("outd/hangs") #if fl in spinputs: # break SPECIALADDED = True SPECIALCHANGED = True todelete.clear() form_bitvector2(bbs, fl, config.BBFORPRUNE, config.SPECIALBITVECTORS) shutil.copy(tfl, config.SPECIAL) config.SPECIALENTRY.append(fl) inputs_new_run.append(fl) for sfl, bitv in config.SPECIALBITVECTORS.iteritems(): if sfl == fl: continue if (config.SPECIALBITVECTORS[fl] & bitv) == bitv: tpath = os.path.join(config.SPECIAL, sfl) os.remove(tpath) todelete.add(sfl) config.SPECIALENTRY.remove(sfl) if sfl in config.TAINTMAP: del config.TAINTMAP[sfl] del config.ANALYSIS_MAP[sfl] if sfl in inputs_new_run: inputs_new_run.remove(sfl) for ele in todelete: del config.SPECIALBITVECTORS[ele] #print "** %s: %d"%(fl,fitnes[fl]) if retc < 0 and retc != -2: print "[*]Error code is %d" % (retc, ) tmpHash = sha1OfFile(config.CRASHFILE) efd.write("%s: %d\n" % (tfl, retc)) efd.flush() os.fsync(efd) config.err_in.append(fl) tnow = datetime.now().isoformat().replace(":", "-") nf = "%s-%s.%s" % (progname, tnow, fl) npath = os.path.join("outd/temp", nf) shutil.copyfile(tfl, npath) if tmpHash not in crashHash: crashHash.append(tmpHash) tnow = datetime.now().isoformat().replace(":", "-") nf = "%s-%s.%s" % (progname, tnow, gau.splitFilename(fl)[1]) npath = os.path.join("outd/crashInputs", nf) shutil.copyfile(tfl, npath) if SPECIALADDED == False: shutil.copy(tfl, config.SPECIAL) #config.SPECIALENTRY.append(fl) #SPECIALADDED=False config.CRASHIN.add(fl) if config.STOPONCRASH == True: #efd.close() crashhappend = True break fitscore = [v for k, v in fitnes.items()] maxfit = max(fitscore) avefit = sum(fitscore) / len(fitscore) mnlen, mxlen, avlen = gau.getFileMinMax(config.INPUTD) print "[*] Done with all input in Gen, starting SPECIAL. \n" #### copy special inputs in SPECIAL directory and update coverage info ### appcov, allcov = gau.calculateCov() tnow = datetime.now().isoformat().replace(":", "-") stat.write( "\t%d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %d\t %s\t %s\n" % (genran, min(fitscore), maxfit, avefit, mnlen, mxlen, avlen, len(config.SEENBB), appcov, allcov, config.NUMINPUTS, config.fname, tnow)) stat.flush() os.fsync(stat.fileno()) print "[*] Wrote to stat.log\n" if crashhappend == True: break genran += 1 #this part is to get initial fitness that will be used to determine if fuzzer got stuck. lastfit = currentfit #currentfit=maxfit currentfit = len(config.SEENBB) if currentfit == lastfit: #lastfit-config.FITMARGIN < currentfit < lastfit+config.FITMARGIN: noprogress += 1 else: noprogress = 0 if noprogress > 20: config.GOTSTUCK = True stat.write("Heavy mutate happens now..\n") noprogress = 0 if (genran >= config.GENNUM) and (config.STOPOVERGENNUM == True): break if len(os.listdir(config.SPECIAL)) > 0 and SPECIALCHANGED == True: if len(os.listdir(config.SPECIAL)) < config.NEWTAINTFILES: get_taint(config.SPECIAL) else: try: os.mkdir(config.TAINTTMP) except OSError: gau.emptyDir(config.TAINTTMP) if conditional_copy_files(config.SPECIAL, config.TAINTTMP, config.NEWTAINTFILES) == 0: get_taint(config.TAINTTMP) #print "MOst common offsets and values:", config.MOSTCOMMON #gg=raw_input("press any key to continue..") print "[*] Going for new generation creation.\n" gau.createNextGeneration3(fitnes, genran) #raw_input("press any key...") efd.close() stat.close() libfd_mm.close() libfd.close() endtime = time.clock() print "[**] Totol time %f sec." % (endtime - starttime, ) print "[**] Fuzzing done. Check %s to see if there were crashes.." % ( config.ERRORS, )
def create_new_child_from_old(self, name, parentInputFL, parentInput, conflicts, newMutation): ''' TODO ''' mutatedInput = parentInput if parentInputFL not in self.mutationHistory: gautils.die( "[-] Create a child (%s) from old one (%s), but old one is not in history : Impossible !" % (name, parentInputFL)) self.mutationHistory.update({name: dict()}) # remove old conflicts mutations # NOTE : if there is a index out of array here, there is a problem for c in conflicts: mutationHistoryData = self.mutationHistory[parentInputFL][ c.offsetsInInput[0]][1] for offset in mutationHistoryData: i = 0 for byte in mutationHistoryData[offset]: mutatedInput[offset + i] = (byte) i += 1 # register the new mutation in history self.mutationHistory[name].update( {newMutation.offsetsInInput[0]: [newMutation]}) # add the history directory self.mutationHistory[name][newMutation.offsetsInInput[0]].append( dict()) mutationHistoryData = self.mutationHistory[name][ newMutation.offsetsInInput[0]][1] # apply new mutation for offset in newMutation.offsetsInInput: mutationHistoryData.update({offset: []}) mutationHistoryDataCurrent = mutationHistoryData[offset] for i in range(0, newMutation.cmpSize): if int(offset + i) >= len(mutatedInput): break if newMutation.taintType == taintTypeEnum.UNKNOWN: # save the old value mutationHistoryDataCurrent.append(mutatedInput[(offset + i)]) # applying the conflicted child input mutation mutatedInput[(offset + i)] = self.currentMutation[i] elif newMutation.taintType == taintTypeEnum.SINGLE_BYTE: # save the old value mutationHistoryDataCurrent.append(mutatedInput[(offset + i)]) # applying the conflicted child input mutation mutatedInput[(offset + i)] = self.currentMutation[0] elif newMutation.taintType == taintTypeEnum.ARRAY: # save the old value mutationHistoryDataCurrent.append(mutatedInput[(offset + i)]) # applying the conflicted child input mutation # TODO : improve strategy mutatedInput[(offset + i)] = self.currentMutation[i] mutatedInput = gautils.apply_more_common_changes(mutatedInput) mutatedInput = gautils.apply_most_common_changes(mutatedInput) # build a string with the mutated input mutatedInput = ''.join(mutatedInput) return mutatedInput