def cmdline(self, argv): parser = argparse.ArgumentParser( prog="pychains", description="Find valid inputs for the given program.", ) parser.add_argument( '-m', dest='module', action='store_true', help="prog is a module name, not a file name.", ) parser.add_argument( '-v', '--verbose', dest='verbose', action='store_true', help="trace the execution of the bytecode.", ) parser.add_argument( 'prog', help="The program to run.", ) parser.add_argument( 'args', nargs=argparse.REMAINDER, help="Arguments to pass to the program.", ) args = parser.parse_args(argv) level = logging.DEBUG if args.verbose else logging.WARNING logging.basicConfig(level=level) self.run_python_file(args.prog, [args.prog] + [tstr(i) for i in args.args])
def __init__(self, myarg): if isinstance(myarg, str): self.my_arg = tainted.tstr(str(myarg)) elif isinstance(myarg, int): self.my_arg = taintint.tint(int(myarg)) elif isinstance(myarg, bytes): self.my_arg = taintbytes.tbytes(bytes(myarg))
def gen_invalid(valid_strs, arg): global current_config # If the string is too short do not use mutators that shrink it further min_len = int(current_config["min_mut_len"]) # Mutation attempts per generated string since mutations are cheap but generation is expensive mut_attempts = int(current_config["max_mut_attempts"]) _mod = imp.load_source('mymod', arg) rejected = set() # Mutation operations well suited for short strings smutops = [bitflip, byteflip, insert] # Mutation operations for longer strings lmutops = [trim, delete, swap] mutops = smutops + lmutops valid_str_lst = RandomizedList([elemnt for elemnt in valid_strs]) # The rough amount of attempts for the whole set of strings mut_acc = mut_attempts * len(valid_str_lst) while mut_acc > 0: # Limit the amount of output elements if len(rejected) >= 2 * len(valid_str_lst): break a = valid_str_lst.get_random_element() # Mutate up to mut_attempts times for i in range( max( int(mut_acc / max(1, (len(valid_str_lst) - len(rejected)))), 1)): mut_acc -= 1 mutator = random.choice(smutops) if len( str(a)) <= min_len else random.choice(mutops) a1 = taintedstr.tstr(mutator(str(a))) try: res = _mod.main(a1) except: print("Mutation result: ", repr(a1), "(", str(mutator), ")", flush=True) rejected.add((a1, a)) break else: a = a1 return rejected
def get_comparison_len(self, traces): # how many of the last characters added had same comparisons? arg_prefix = self.my_arg if isinstance(arg_prefix, int): arg_prefix = tstr(arg_prefix) while traces: h, *ltrace = traces k = self.parsing_state(h, arg_prefix) if k == EState.Byte or EState.EOF: return 2 elif k == EState.Trim: return 2 elif k == EState.Unknown: traces = ltrace continue else: assert False return -1
def parsing_state(self, h, arg_prefix): # every iteration we add a hexadecimal (2 digits) if isinstance(arg_prefix, int): arg_prefix = tstr(arg_prefix) _, _, hex_part = arg_prefix.rpartition('x') last_char_added = taintbytes.tbytes(codecs.decode(hex_part, 'hex'))[-1] o = h.op if o in CmpSet and h.op_A.x() == last_char_added.x(): # last added byte was wrong -> fix it return (1, EState.Byte, h) if o in CmpSet and h.op_A.x() != last_char_added.x(): # some early byte was wrong. trim to that position return (1, EState.Trim, h) elif h.op_A.x() == len(last_char_added): return (1, EState.EOF, h) else: return (-1, EState.Unknown, (h, last_char_added))
char + "'. What's up with that?") return float(strValue) # p = Parser(expression, vars) def main(s): parse = Parser(s) # if s in ["a", "b", "c"]: # return print(parse.getValue()) def inputs(): import sys; if len(sys.argv) > 1: return open(sys.argv[1]).readlines() return ['1 + 2', '2 * 3 + 1', '(1-2)/3', '22.2 - 0.1', '(1)*(2)'] def skip_classes(): return [] if __name__ == "__main__": import taintedstr for i in inputs(): print(">>",i) print(main(taintedstr.tstr(i)))
from datatypes.taintedint import tint from taintedstr import tstr def func(t: str): print((str(t) + ' Hello')) d = {3: 'Hello'} i = 4 func('No') b = tint(4) b.to_bytes(2, 'big') tint(2).in_(d.keys()) i = int(5) tint(i).in_(d.keys()) s = tstr('hello') s.in_(d.keys())
fout = sys.stdout if len(sys.argv) < 3 else open("%s.tmp" % sys.argv[2], 'w') valid_n = 0 all_n = 0 cov = coverage.Coverage(source=['example'], branch=Branch) mylst = pickle.load(fin) last_t = 0 last_f = 0 last_f_t = 0 last_f_i = 0 unique = 0 for j,(i, t) in enumerate(mylst): try: #print(j, repr(i), flush=True, end='') cov.start() all_n += 1 v = mod_obj.main(taintedstr.tstr(i)) cov.stop() valid_n += 1 except: pass last_t = t f = cov.report(file=open(os.devnull, 'w')) if f > last_f: last_f = f last_f_t = t last_f_i = j unique += 1 print(j, "coverage: %.2f%%" % f, ' at ', '%.2f seconds' % t, repr(i), flush=True) cov.save() c = cov.report(file=fout) print("Valid: %d/%d with %f coverage at %f seconds" % (valid_n, all_n, c, last_t))
def __init__(self, myarg): self.my_arg = tainted.tstr(str(myarg))
def create_arg(s): return tainted.tstr(s)
'23', '31', '[10, 201, 300]', '[4222 , 522 , 613, 7001 ]', '[14 , 25 , 36, 475 ]', '["axe" , "boy" , "cats", "digger"]', 'true', 'false', 'null', '"hello"', '{"hello":"world"}', '{"xx":9990, "yy":8888}', '[{"hello":"world"}, {"goodbye":"world"}]', ] return INPUTS def skip_classes(): return ['.*JSONStream.*'] def main(s): return from_json(s) if __name__ == '__main__': import taintedstr for i in inputs(): result = from_json(taintedstr.tstr(i)) print(repr(result))
@contextmanager def opened_file(f, perm): if not f: yield sys.stdout.buffer else: with open(f, perm) as f: yield f if __name__ == "__main__": m_file = sys.argv[1] mod_obj = imp.new_module('example') mod_obj.__file__ = m_file code = compile(open(m_file).read(), os.path.basename(m_file), 'exec') exec(code, mod_obj.__dict__) if len(sys.argv) > 2: fn = sys.argv[2] inp = sys.argv[3] else: fn = None inp = None with opened_file(fn, 'wb') as trace_file: with opened_file(inp, 'rb') as myinput: # Infer grammar for j,(_i,t) in enumerate(records(myinput)): i = taintedstr.tstr(_i) print("trace:",j, repr(i), file=sys.stderr) with tracer.Tracer(i, trace_file) as t: t._my_files = ['%s' % os.path.basename(m_file)] t._skip_classes = mod_obj.skip_classes() if hasattr(mod_obj, 'skip_classes') else [] o = mod_obj.main(i)
else: raise Exception(s, i) return i, expr def parse(s): i, e = parse_expr(s, 0) if s[i:]: raise Exception(s) return e def main(s): return parse(s) def inputs(): import sys if len(sys.argv) > 1: return open(sys.argv[1]).readlines() return ['((1+1)+1)', '1+1+(1+(1+1))', '1+(1+1)', '(1+1)', '(1)', '1'] if __name__ in ['__main__']: import taintedstr import sys if len(sys.argv) == 1: for i in inputs(): print(main(taintedstr.tstr(i))) else: print(main(taintedstr.tstr(sys.argv[1])))
import taintedstr import string ascii_letters = taintedstr.tstr(string.ascii_letters).untaint() digits = taintedstr.tstr(string.digits).untaint()
def solve(self, traces, i, seen): arg_prefix = self.my_arg # add the prefix to seen. sprefix = str(arg_prefix) # always two characters are added -> hexadecimal digits if isinstance(arg_prefix, int): arg_prefix = tstr(arg_prefix) last_char_added = arg_prefix[-2:] while traces: h, *ltrace = traces o = h.op idx, k, info = self.parsing_state(h, arg_prefix) log((config.RandomSeed, i, idx, k, info, "is tainted", isinstance(h.op_A, tainted.tstr)), 1) if k == EState.Byte: # A byte comparison of the *last* byte. # This was a byte comparison. So collect all # comparisons made using this byte. until the # first comparison that was made otherwise. # Now, try to fix the last failure fixes = self.get_previous_fixes(h, sprefix, seen) fragments = self.get_previous_seen_fragments(seen) cmp_stack = self.comparisons_on_given_char(h, traces) # Now, try to fix the last failure corr = self.get_corrections( cmp_stack, lambda i: i not in fragments and i not in fixes) if not corr: raise Exception('Exhausted attempts: %s' % fragments) prefix = sprefix[:-2] sols = [] newBytes = [new_byte.my_arg for new_byte in corr] newBytes = newBytes if config.WeightedGeneration else sorted( set(newBytes)) for new_byte in newBytes: arg = "%s%s" % (prefix, new_byte) sols.append(self.create_prefix(arg)) return sols elif k == EState.Trim: # we need to (1) find where h.op_A._idx is within # sys_args, and trim sys_args to that location, and # add a new character. taints = last_char_added.x() fix = sprefix[taints:taints + 2] fixes = self.get_previous_fixes(h, sprefix, seen) fragments = self.get_previous_seen_fragments(seen) opB = str(h.op_B) if not opB or fix == self.parseToHexByte(opB) or opB in fixes: # we add always two digits for one byte cmp_stack = self.comparisons_on_given_char(h, traces) corrections = self.get_corrections( cmp_stack, lambda i: i != fix and i not in fixes) args = random.choice(corrections).my_arg else: args = self.parseToHexByte(opB) args = "%s%s%s" % (sprefix[:taints], args, sprefix[taints + 2:]) # we already know the result for next character sols = [self.create_prefix(args)] return sols elif k == EState.EOF: # An empty comparison at the EOF sols = [] # a new byte is always two charachters (a hexadecimal number) for first_char in All_Characters: for second_char in All_Characters: new_byte = first_char + second_char arg = "%s%s" % (sprefix, new_byte) sols.append(self.create_prefix(arg)) return sols elif k == EState.Unknown: # Unknown what exactly happened. Strip the last and try again # try again. traces = ltrace continue else: assert False return []