class SlaveProcess: def __init__(self, slave_id, config, connection, auto_reload=False): self.config = config self.slave_id = slave_id self.q = qemu(self.slave_id, self.config, debug_mode=config.argument_values['debug']) self.statistics = SlaveStatistics(self.slave_id, self.config) self.logic = FuzzingStateLogic(self, self.config) self.conn = connection self.bitmap_storage = BitmapStorage(self.config, self.config.config_values['BITMAP_SHM_SIZE'], "master") def handle_import(self, msg): meta_data = {"state": {"name": "import"}, "id": 0} payload = msg["task"]["payload"] self.logic.process_node(payload, meta_data) self.conn.send_ready() def handle_busy(self): busy_timeout = 1 kickstart = False if kickstart: # spend busy cycle by feeding random strings? log_slave("No ready work items, attempting random..", self.slave_id) start_time = time.time() while (time.time() - start_time) < busy_timeout: meta_data = {"state": {"name": "import"}, "id": 0} payload = rand.bytes(rand.int(32)) self.logic.process_node(payload, meta_data) else: log_slave("No ready work items, waiting...", self.slave_id) time.sleep(busy_timeout) self.conn.send_ready() def handle_node(self, msg): meta_data = QueueNode.get_metadata(msg["task"]["nid"]) payload = QueueNode.get_payload(meta_data["info"]["exit_reason"], meta_data["id"]) results, new_payload = self.logic.process_node(payload, meta_data) if new_payload: default_info = {"method": "validate_bits", "parent": meta_data["id"]} if self.validate_bits(new_payload, meta_data, default_info): log_slave("Stage %s found alternative payload for node %d" % (meta_data["state"]["name"], meta_data["id"]), self.slave_id) else: log_slave("Provided alternative payload found invalid - bug in stage %s?" % meta_data["state"]["name"], self.slave_id) self.conn.send_node_done(meta_data["id"], results, new_payload) def loop(self): if not self.q.start(): return log_slave("Started qemu", self.slave_id) while True: try: msg = self.conn.recv() except ConnectionResetError: log_slave("Lost connection to master. Shutting down.", self.slave_id) return if msg["type"] == MSG_RUN_NODE: self.handle_node(msg) elif msg["type"] == MSG_IMPORT: self.handle_import(msg) elif msg["type"] == MSG_BUSY: self.handle_busy() else: raise ValueError("Unknown message type {}".format(msg)) def quick_validate(self, data, old_res, quiet=False): # Validate in persistent mode. Faster but problematic for very funky targets self.statistics.event_exec() old_array = old_res.copy_to_array() new_res = self.__execute(data).apply_lut() new_array = new_res.copy_to_array() if new_array == old_array: return True if not quiet: log_slave("Input validation failed! Target is funky?..", self.slave_id) return False def funky_validate(self, data, old_res): # Validate in persistent mode with stochastic prop of funky results validations = 8 confirmations = 0 for _ in range(validations): if self.quick_validate(data, old_res, quiet=True): confirmations += 1 if confirmations >= 0.8*validations: return True log_slave("Funky input received %d/%d confirmations. Rejecting.." % (confirmations, validations), self.slave_id) if self.config.argument_values['v']: self.store_funky(data) return False def store_funky(self, data): global num_funky num_funky += 1 # store funky input for further analysis funky_folder = self.config.argument_values['work_dir'] + "/funky/" atomic_write(funky_folder + "input_%02d_%05d" % (self.slave_id, num_funky), data) def validate_bits(self, data, old_node, default_info): new_bitmap, _ = self.execute(data, default_info) # handle non-det inputs if new_bitmap is None: return False old_bits = old_node["new_bytes"].copy() old_bits.update(old_node["new_bits"]) return GlobalBitmap.all_new_bits_still_set(old_bits, new_bitmap) def validate_bytes(self, data, old_node, default_info): new_bitmap, _ = self.execute(data, default_info) # handle non-det inputs if new_bitmap is None: return False old_bits = old_node["new_bytes"].copy() return GlobalBitmap.all_new_bits_still_set(old_bits, new_bitmap) def execute_redqueen(self, data): self.statistics.event_exec_redqueen() return self.q.execute_in_redqueen_mode(data) def __send_to_master(self, data, execution_res, info): info["time"] = time.time() info["exit_reason"] = execution_res.exit_reason info["performance"] = execution_res.performance if self.conn is not None: self.conn.send_new_input(data, execution_res.copy_to_array(), info) def trace_payload(self, data, info): trace_file_in = self.config.argument_values['work_dir'] + "/redqueen_workdir_%d/pt_trace_results.txt" % self.slave_id; trace_folder = self.config.argument_values['work_dir'] + "/traces/" trace_file_out = trace_folder + "payload_%05d" % info['id'] log_slave("Tracing payload_%05d.." % info['id'], self.slave_id) try: self.q.set_payload(data) exec_res = self.q.execute_in_trace_mode(timeout_detection=False) with open(trace_file_in, 'rb') as f_in: with lz4.LZ4FrameFile(trace_file_out + ".lz4", 'wb', compression_level=lz4.COMPRESSIONLEVEL_MINHC) as f_out: shutil.copyfileobj(f_in, f_out) if not exec_res.is_regular(): self.statistics.event_reload() self.q.reload() except Exception as e: log_slave("Failed to produce trace %s: %s (skipping..)" % (trace_file_out, e), self.slave_id) return None return exec_res def __execute(self, data, retry=0): try: self.q.set_payload(data) return self.q.send_payload() except (ValueError, BrokenPipeError): if retry > 2: # TODO if it reliably kills qemu, perhaps log to master for harvesting.. print_fail("Slave %d aborting due to repeated SHM/socket error. Check logs." % self.slave_id) log_slave("Aborting due to repeated SHM/socket error. Payload: %s" % repr(data), self.slave_id) raise print_warning("SHM/socket error on Slave %d (retry %d)" % (self.slave_id, retry)) log_slave("SHM/socket error, trying to restart qemu...", self.slave_id) self.statistics.event_reload() if not self.q.restart(): raise return self.__execute(data, retry=retry+1) def execute(self, data, info): self.statistics.event_exec() exec_res = self.__execute(data) is_new_input = self.bitmap_storage.should_send_to_master(exec_res) crash = exec_res.is_crash() stable = False # store crashes and any validated new behavior # do not validate timeouts and crashes at this point as they tend to be nondeterministic if is_new_input: if not crash: assert exec_res.is_lut_applied() if self.config.argument_values["funky"]: stable = self.funky_validate(data, exec_res) else: stable = self.quick_validate(data, exec_res) if not stable: # TODO: auto-throttle persistent runs based on funky rate? self.statistics.event_funky() if crash or stable: self.__send_to_master(data, exec_res, info) else: if crash: log_slave("Crashing input found (%s), but not new (discarding)" % (exec_res.exit_reason), self.slave_id) # restart Qemu on crash if crash: self.statistics.event_reload() self.q.reload() return exec_res, is_new_input
class SlaveProcess: def __init__(self, slave_id, config, connection, auto_reload=False): self.config = config self.slave_id = slave_id self.q = qemu(self.slave_id, self.config) self.q.start(verbose=False) print "started qemu" self.statistics = SlaveStatistics(self.slave_id, self.config) self.logic = FuzzingStateLogic(self, self.config) self.conn = connection self.bitmap_storage = BitmapStorage( self.config, self.config.config_values['BITMAP_SHM_SIZE'], "master") configure_log_prefix("%.2d" % slave_id) def handle_server_msg(self, msg): if msg["type"] == MSG_NEW_TASK: return self.handle_task(msg) if msg["type"] == MSG_QUEUE_STATUS: return self.handle_queue_status(msg) raise "unknown message type {}".format(msg) def handle_task(self, msg): if msg["task"]["type"] == "import": meta_data = {"state": {"name": "import"}} payload = msg["task"]["payload"] elif msg["task"]["type"] == "node": meta_data = QueueNode.get_metadata(msg["task"]["nid"]) payload = QueueNode.get_payload(meta_data["info"]["exit_reason"], meta_data["id"]) print "slave %d got task %d %s" % (self.slave_id, meta_data.get( "node", {}).get("id", -1), repr(meta_data)) self.statistics.event_task(msg["task"]) results, new_payload = self.logic.process(payload, meta_data) node_id = None if new_payload != payload: default_info = { "method": "validate_bits", "parent": meta_data["id"] } if self.validate_bits(new_payload, meta_data, default_info): print("VALIDATE BITS OK") else: print( "VALIDATE BITS FAILED BUG IN TRANSFORMATION!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ) # assert False if results: node_id = meta_data["id"] self.conn.send_task_performed(node_id, results, new_payload) # print "performed task" def handle_queue_status(self, msg): pass def loop(self): while True: # print "client waiting...." msg = self.conn.recv() # print "got %s"%repr(msg) self.handle_server_msg(msg) def validate(self, data, old_array): self.q.set_payload(data) self.statistics.event_exec() new_bitmap = self.q.send_payload().apply_lut() new_array = new_bitmap.copy_to_array() if new_array == old_array: print("Validate OK") return True, new_bitmap else: for i in xrange(new_bitmap.bitmap_size): if old_array[i] != new_array[i]: safe_print("found fucky bit %d (%d vs %d)" % (i, old_array[i], new_array[i])) # assert(False) print( "VALIDATE FAILED, Not returning a bitmap!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ) return False, None def validate_bits(self, data, old_node, default_info): new_bitmap, _ = self.execute_with_bitmap(data, default_info) # handle non-det inputs if new_bitmap is None: return False old_bits = old_node["new_bytes"].copy() old_bits.update(old_node["new_bits"]) return GlobalBitmap.all_new_bits_still_set(old_bits, new_bitmap) def validate_bytes(self, data, old_node, default_info): new_bitmap, _ = self.execute_with_bitmap(data, default_info) # handle non-det inputs if new_bitmap is None: return False old_bits = old_node["new_bytes"].copy() return GlobalBitmap.all_new_bits_still_set(old_bits, new_bitmap) def execute_redqueen(self, data): self.statistics.event_exec_redqueen() return self.q.execute_in_redqueen_mode(data, debug_mode=False) def execute_with_bitmap(self, data, info): bitmap, new_input = self.__execute(data, info) return bitmap, new_input def execute(self, data, info): bitmap, new_input = self.__execute(data, info) return new_input def __send_to_master(self, data, execution_res, info): info["time"] = time.time() info["exit_reason"] = execution_res.exit_reason info["performance"] = execution_res.performance if self.conn is not None: self.conn.send_new_input(data, execution_res.copy_to_array(), info) def check_fuckyness_and_store_trace(self, data): global num_fucky exec_res = self.q.send_payload() hash = exec_res.hash() trace1 = read_binary_file(self.config.argument_values['work_dir'] + "/pt_trace_dump_%d" % self.slave_id) exec_res = self.q.send_payload() if (hash != exec_res.hash()): safe_print( "found fucky bits, dumping!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ) num_fucky += 1 trace_folder = self.config.argument_values[ 'work_dir'] + "/traces/fucky_%d_%d" % (num_fucky, self.slave_id) os.makedirs(trace_folder) atomic_write(trace_folder + "/input", data) atomic_write(trace_folder + "/trace_a", trace1) trace2 = read_binary_file(self.config.argument_values["work_dir"] + "/pt_trace_dump_%d" % self.slave_id) atomic_write(trace_folder + "/trace_b", trace2) return exec_res def __execute(self, data, info): self.statistics.event_exec() self.q.set_payload(data) if False: # Do not emit tracefiles on broken executions exec_res = self.check_fuckyness_and_store_trace(data) else: exec_res = self.q.send_payload() is_new_input = self.bitmap_storage.should_send_to_master(exec_res) crash = self.execution_exited_abnormally( ) # we do not want to validate timeouts and crashes as they tend to be nondeterministic if is_new_input: if not crash: assert exec_res.is_lut_applied() bitmap_array = exec_res.copy_to_array() valid, exec_res = self.validate(data, bitmap_array) if crash or valid: self.__send_to_master(data, exec_res, info) return exec_res, is_new_input def execution_exited_abnormally(self): return self.q.crashed or self.q.timeout or self.q.kasan # Todo: Fixme def __restart_vm(self): return True if self.comm.slave_termination.value: return False self.comm.reload_semaphore.acquire() try: # raise Exception("!") # QEMU is full of memory leaks...fixing it that way... if self.soft_reload_counter >= 32: self.soft_reload_counter = 0 raise Exception("...") self.q.soft_reload() self.soft_reload_counter += 1 except: log_slave("restart failed %s" % traceback.format_exc(), self.slave_id) while True: self.q.__del__() self.q = qemu(self.slave_id, self.config) if self.q.start(): break else: time.sleep(0.5) log_slave("Fail Reload", self.slave_id) self.comm.reload_semaphore.release() self.q.set_tick_timeout_treshold(self.stage_tick_treshold * self.timeout_tick_factor) if self.comm.slave_termination.value: return False return True
class SlaveProcess: def __init__(self, slave_id, config, connection, auto_reload=False): self.config = config self.slave_id = slave_id self.q = qemu(self.slave_id, self.config) self.statistics = SlaveStatistics(self.slave_id, self.config) self.logic = FuzzingStateLogic(self, self.config) self.conn = connection self.bitmap_storage = BitmapStorage(self.config, self.config.config_values['BITMAP_SHM_SIZE'], "master") def handle_import(self, msg): meta_data = {"state": {"name": "import"}, "id": 0} payload = msg["task"]["payload"] self.logic.process_node(payload, meta_data) self.conn.send_ready() def handle_busy(self): busy_timeout = 1 kickstart = False if kickstart: # spend busy cycle by feeding random strings? log_slave("No ready work items, attempting random..", self.slave_id) start_time = time.time() while (time.time() - start_time) < busy_timeout: meta_data = {"state": {"name": "import"}, "id": 0} payload = rand.bytes(rand.int(32)) self.logic.process_node(payload, meta_data) else: log_slave("No ready work items, waiting...", self.slave_id) time.sleep(busy_timeout) self.conn.send_ready() def handle_node(self, msg): meta_data = QueueNode.get_metadata(msg["task"]["nid"]) payload = QueueNode.get_payload(meta_data["info"]["exit_reason"], meta_data["id"]) results, new_payload = self.logic.process_node(payload, meta_data) if new_payload: default_info = {"method": "validate_bits", "parent": meta_data["id"]} if self.validate_bits(new_payload, meta_data, default_info): log_slave("Stage %s found alternative payload for node %d" % (meta_data["state"]["name"], meta_data["id"]), self.slave_id) else: log_slave("Provided alternative payload found invalid - bug in stage %s?" % meta_data["state"]["name"], self.slave_id) self.conn.send_node_done(meta_data["id"], results, new_payload) def loop(self): #try twice here if not self.q.start(): if not self.q.start(): return log_slave("Started qemu", self.slave_id) while True: try: msg = self.conn.recv() except ConnectionResetError: log_slave("Lost connection to master. Shutting down.", self.slave_id) return if msg["type"] == MSG_RUN_NODE: self.handle_node(msg) elif msg["type"] == MSG_IMPORT: self.handle_import(msg) elif msg["type"] == MSG_BUSY: self.handle_busy() else: raise ValueError("Unknown message type {}".format(msg)) def validate(self, data, old_array): self.q.set_payload(data) self.statistics.event_exec() new_bitmap = self.q.send_payload().apply_lut() new_array = new_bitmap.copy_to_array() #debugging_code, every payload is valid return True, new_bitmap if new_array == old_array: return True, new_bitmap log_slave("Validation failed, ignoring this input", self.slave_id) #debugging_code #if False: # activate detailed logging of funky bitmaps if True: for i in range(new_bitmap.bitmap_size): if old_array[i] != new_array[i]: log_slave("Funky bit in validation bitmap: %d (%d vs %d)" % (i, old_array[i], new_array[i]), self.slave_id) return False, None def validate_bits(self, data, old_node, default_info): new_bitmap, _ = self.execute(data, default_info) # handle non-det inputs if new_bitmap is None: return False old_bits = old_node["new_bytes"].copy() old_bits.update(old_node["new_bits"]) return GlobalBitmap.all_new_bits_still_set(old_bits, new_bitmap) def validate_bytes(self, data, old_node, default_info): new_bitmap, _ = self.execute(data, default_info) # handle non-det inputs if new_bitmap is None: return False old_bits = old_node["new_bytes"].copy() return GlobalBitmap.all_new_bits_still_set(old_bits, new_bitmap) def execute_redqueen(self, data): self.statistics.event_exec_redqueen() return self.q.execute_in_redqueen_mode(data) def __execute(self, data, retry=0): try: self.q.set_payload(data) if False: # activate detailed comparison of execution traces? return self.check_funkyness_and_store_trace(data) else: return self.q.send_payload() except BrokenPipeError: if retry > 2: # TODO if it reliably kills qemu, perhaps log to master for harvesting.. log_slave("Fatal: Repeated BrokenPipeError on input: %s" % repr(data), self.slave_id) raise else: log_slave("BrokenPipeError, trying to restart qemu...", self.slave_id) self.q.shutdown() time.sleep(1) self.q.start() return self.__execute(data, retry=retry+1) assert False def __send_to_master(self, data, execution_res, info): info["time"] = time.time() info["exit_reason"] = execution_res.exit_reason info["performance"] = execution_res.performance if self.conn is not None: self.conn.send_new_input(data, execution_res.copy_to_array(), info) def check_funkyness_and_store_trace(self, data): global num_funky exec_res = self.q.send_payload() hash = exec_res.hash() trace1 = read_binary_file(self.config.argument_values['work_dir'] + "/pt_trace_dump_%d" % self.slave_id) exec_res = self.q.send_payload() if (hash != exec_res.hash()): print_warning("Validation identified funky bits, dumping!") num_funky += 1 trace_folder = self.config.argument_values['work_dir'] + "/traces/funky_%d_%d" % (num_funky, self.slave_id); os.makedirs(trace_folder) atomic_write(trace_folder + "/input", data) atomic_write(trace_folder + "/trace_a", trace1) trace2 = read_binary_file(self.config.argument_values["work_dir"] + "/pt_trace_dump_%d" % self.slave_id) atomic_write(trace_folder + "/trace_b", trace2) return exec_res def execute(self, data, info): self.statistics.event_exec() exec_res = self.__execute(data) is_new_input = self.bitmap_storage.should_send_to_master(exec_res) crash = self.execution_exited_abnormally() # store crashes and any validated new behavior # do validate timeouts and crashes at this point as they tend to be nondeterministic if is_new_input: if not crash: assert exec_res.is_lut_applied() bitmap_array = exec_res.copy_to_array() valid, exec_res = self.validate(data, bitmap_array) if not valid: self.statistics.event_funky() log_slave("Input validation failed, throttling N/A", self.slave_id) # TODO: the funky event is already over at this point and the error may indeed not be deterministic # how about we store some $num funky payloads for more focused exploration rather than discarding them? #exec_res.exit_reason = 'funky' #self.__send_to_master(data, exec_res, info) if crash or valid: self.__send_to_master(data, exec_res, info) else: if crash: #debugging_code self.__send_to_master(data, exec_res, info) #log_slave("Crashing input found (%s), but not new (discarding)" % (exec_res.exit_reason), self.slave_id) # restart Qemu on crash if crash: self.statistics.event_reload() self.q.restart() return exec_res, is_new_input def execution_exited_abnormally(self): return self.q.crashed or self.q.timeout or self.q.kasan