def install_trace_hook(map_start_addr, map_size_bits=None, ngram_size=None): if map_size_bits is None: map_size_bits = get_map_size_bits_env() or DEFAULT_MAP_SIZE_BITS if ngram_size is None: ngram_size = int(os.environ.get(NGRAM_SIZE_ENV_VAR, 0)) tracehook.set_map_start(map_start_addr) tracehook.set_map_size_bits(map_size_bits) tracehook.set_ngram_size(ngram_size) sys.settrace(tracehook.global_trace_hook)
def test_single_repeated( map_size_bits, ngram_size, pattern, teststr0, maxvisits0, teststr1, visits1, ): """ After matching `teststr0` against `pattern`, the most-visited map address should be extected to have `maxvisits0` visits. after clearing the map and matching for `teststr1`, that same address should now have `visits1` visits. """ # the only map addresses that get reliably repeatedly incremented will be ones # where prev_loc == this_loc, i.e. excluding the first visit at a loc. with # ngrams enabled, this uncounted region is increased proportionally to the # ngram_size if ngram_size: maxvisits0 -= ngram_size - 1 visits1 -= ngram_size - 1 with mmap.mmap(-1, 1 << map_size_bits, flags=mmap.MAP_PRIVATE) as mem: first_byte = ctypes.c_byte.from_buffer(mem) try: tracehook.set_map_start(ctypes.addressof(first_byte)) tracehook.set_map_size_bits(map_size_bits) tracehook.set_ngram_size(ngram_size) _reset_prev_loc() _reset_mem(mem, map_size_bits) pattern.match(teststr0) max_i, max_val = max(enumerate(mem.read()), key=lambda i_v: i_v[1]) mem.seek(0) assert max_val == maxvisits0 _reset_prev_loc() _reset_mem(mem, map_size_bits) pattern.match(teststr1) assert mem.read()[max_i] == visits1 finally: del first_byte
def test_record_loc_masks_prev_loc(map_size_bits): with mmap.mmap(-1, 1 << map_size_bits, flags=mmap.MAP_PRIVATE) as mem: first_byte = ctypes.c_byte.from_buffer(mem) try: tracehook.set_map_start(ctypes.addressof(first_byte)) tracehook.set_map_size_bits(map_size_bits) # simulate a tool using the c header interface to record a loc, not masking its # loc to the map size _test_record_loc(0xdeadbeef) # fortunately `pythonapi` is lenient enough to just let us peek on any of our # internal symbols __afl_prev_loc = ctypes.c_uint32.in_dll(ctypes.pythonapi, "__afl_prev_loc") # check any tool taking __afl_prev_loc as a map location directly won't address # beyond our allocated map assert __afl_prev_loc.value < (1 << map_size_bits) finally: del first_byte
def test_line_trace_hook(map_size_bits, ngram_size, map_prepop, lineno_lasti_pairs, expected_nonzeros): with mmap.mmap(-1, 1 << map_size_bits, flags=mmap.MAP_PRIVATE) as mem: first_byte = ctypes.c_byte.from_buffer(mem) try: tracehook.set_map_start(ctypes.addressof(first_byte)) tracehook.set_map_size_bits(map_size_bits) tracehook.set_ngram_size(ngram_size) for lineno, lasti in lineno_lasti_pairs[:-1]: mock_frame = mock.create_autospec( FrameType, instance=True, f_lineno=lineno, f_lasti=lasti, ) tracehook.line_trace_hook(mock_frame, "line", mock.Mock()) # we can't effectively assert the action of the prior calls as they will depend on # their *previous* call arguments, so clear it so we can make a clean assertion # following the next call mem.write(_get_populated_map_bytes(map_size_bits, map_prepop)) mem.seek(0) lineno, lasti = lineno_lasti_pairs[-1] mock_frame = mock.create_autospec( FrameType, instance=True, f_lineno=lineno, f_lasti=lasti, ) tracehook.line_trace_hook(mock_frame, "line", mock.Mock()) assert mem.read() == _get_populated_map_bytes( map_size_bits, expected_nonzeros) finally: del first_byte
def test_same_visits( map_size_bits, ngram_size, pattern, teststr0, teststr1, ): """ Matching `teststr0` and `teststr1` against `pattern` should produce identical maps for one reason or another """ with mmap.mmap(-1, 1 << map_size_bits, flags=mmap.MAP_PRIVATE) as mem: first_byte = ctypes.c_byte.from_buffer(mem) try: tracehook.set_map_start(ctypes.addressof(first_byte)) tracehook.set_map_size_bits(map_size_bits) tracehook.set_ngram_size(ngram_size) _reset_prev_loc() _reset_mem(mem, map_size_bits) pattern.match(teststr0) visits0 = mem.read() mem.seek(0) _reset_prev_loc() _reset_mem(mem, map_size_bits) pattern.match(teststr1) visits1 = mem.read() mem.seek(0) assert visits0 == visits1 finally: del first_byte
def test_more_visits( map_size_bits, ngram_size, pattern, teststr_fewer, teststr_greater, ): """ After matching `teststr_fewer` and `teststr_greater` against `pattern`, `teststr_greater` should have more overall visits in its map. results could be sensitive to small upstream changes. """ with mmap.mmap(-1, 1 << map_size_bits, flags=mmap.MAP_PRIVATE) as mem: first_byte = ctypes.c_byte.from_buffer(mem) try: tracehook.set_map_start(ctypes.addressof(first_byte)) tracehook.set_map_size_bits(map_size_bits) tracehook.set_ngram_size(ngram_size) _reset_prev_loc() _reset_mem(mem, map_size_bits) pattern.match(teststr_fewer) visits_fewer = sum(mem.read()) mem.seek(0) _reset_prev_loc() _reset_mem(mem, map_size_bits) pattern.match(teststr_greater) visits_greater = sum(mem.read()) assert visits_fewer < visits_greater finally: del first_byte
from cpytraceafl import fuzz_from_here, DEFAULT_MAP_SIZE_BITS, get_map_size_bits_env # must ensure the tracehook module gets imported *before* any instrumented native modules, # so that the __afl_area_ptr and __afl_prev_loc global symbols have been loaded from cpytraceafl.tracehook import set_map_start import sysv_ipc # if we're going to "warm up" the code under test in a way that executes native instrumented # code *before* we do the fork & start tracing, we need to provide a dummy memory area for # __afl_area_ptr to point to. here, use some fresh sysv shared memory because it's what we # have to hand. map_size_bits = get_map_size_bits_env() or DEFAULT_MAP_SIZE_BITS dummy_sm = sysv_ipc.SharedMemory(None, size=1 << map_size_bits, flags=sysv_ipc.IPC_CREX) set_map_start(dummy_sm.address) import PIL # we only want to exercise the PCX code for now: unregister all other plugins so our input # doesn't get recognized as those formats. not getting recognized as a PCX should just lead to # a single boring path that doesn't distract the fuzzing process. PIL._plugins[:] = ["PcxImagePlugin"] from PIL import Image import codecs from io import BytesIO import sys # warm up code under test, ensure lazy imports are performed and internal caches are populated. Image.open( BytesIO( codecs.decode(