def __restart_vm(self): return True if self.comm.slave_termination.value: return False self.comm.reload_semaphore.acquire() try: if self.soft_reload_counter >= 32: self.soft_reload_counter = 0 raise Exception("...") self.q.soft_reload() self.soft_reload_counter += 1 except: log_slave("restart failed %s"%traceback.format_exc(), self.slave_id) while True: self.q.__del__() self.q = qemu(self.slave_id, self.config) if self.q.start(): break else: time.sleep(0.5) log_slave("Fail Reload", self.slave_id) self.comm.reload_semaphore.release() self.q.set_tick_timeout_treshold(self.stage_tick_treshold * self.timeout_tick_factor) if self.comm.slave_termination.value: return False return True
def __restart_vm(self): if self.comm.slave_termination.value: return False self.comm.reload_semaphore.acquire() try: #raise Exception("!") # QEMU is full of memory leaks...fixing it that way... if self.soft_reload_counter >= 32: self.soft_reload_counter = 0 raise Exception("...") self.q.soft_reload() self.soft_reload_counter += 1 except: while True: self.q.__del__() self.q = qemu(self.slave_id, self.config) if self.q.start(): break else: time.sleep(0.5) log_slave("Fail Reload", self.slave_id) self.comm.reload_semaphore.release() self.q.set_tick_timeout_treshold(self.stage_tick_treshold * self.timeout_tick_factor) if self.comm.slave_termination.value: return False return True
def __perform_redqueen(self, payload, metadata): self.stage_update_label("redq_coloring") orig_hash = self.__get_bitmap_hash_robust(payload) extension = bytes([207, 117, 130, 107, 183, 200, 143, 154]) appended_hash = self.__get_bitmap_hash_robust(payload + extension) if orig_hash and orig_hash == appended_hash: log_slave("Redqueen: input can be extended", self.slave.slave_id) payload_array = bytearray(payload + extension) else: payload_array = bytearray(payload) colored_alternatives = self.__perform_coloring(payload_array) if colored_alternatives: payload_array = colored_alternatives[0] assert isinstance(colored_alternatives[0], bytearray), print("!! ColoredAlternatives:", repr(colored_alternatives[0]), type(colored_alternatives[0])) else: log_redq("Input is not stable, skipping..") return rq_info = RedqueenInfoGatherer() rq_info.make_paths(RedqueenWorkdir(self.slave.slave_id, self.config)) rq_info.verbose = False for pld in colored_alternatives: if self.execute_redqueen(pld): rq_info.get_info(pld) rq_info.get_proposals() self.stage_update_label("redq_mutate") rq_info.run_mutate_redqueen(payload_array, self.execute)
def __init__(self, comm, id, reload=False): threading.Thread.__init__(self) self.comm = comm self.slave_id = id self.config = FuzzerConfiguration() self.q = qemu(id, self.comm.files[2], self.comm.qemu_socket_prefix, config=self.config) self.model = Model(self) self.comm.register_model(self.slave_id, self.model) self.state = SlaveState.WAITING self.payload_sem = threading.BoundedSemaphore(value=1) self.payload_sem.acquire() self.idx_sem = threading.BoundedSemaphore(value=1) self.idx_sem.acquire() self._stop_event = threading.Event() self.bitmap_size = self.config.config_values['BITMAP_SHM_SIZE'] self.bitmap_filename = self.comm.files[2] + str(self.slave_id) self.comm.slave_locks_bitmap[self.slave_id].acquire() self._qemu_ready = False self.reproduce = self.config.argument_values['reproduce'] self.globalmodel = None if self.reproduce: self.globalmodel = GlobalModel(self.config) # Grab the lock during initialization if self.slave_id < len(self.comm.concolic_locks): self.comm.concolic_locks[self.slave_id].acquire() log_slave("concolic locked", self.slave_id)
def handle_node(self, msg): meta_data = QueueNode.get_metadata(msg["task"]["nid"]) payload = QueueNode.get_payload(meta_data["info"]["exit_reason"], meta_data["id"]) results, new_payload = self.logic.process_node(payload, meta_data) if new_payload: default_info = { "method": "validate_bits", "parent": meta_data["id"], "IoControlCode": meta_data["info"]["IoControlCode"] } if self.validate_bits(new_payload, meta_data, default_info): log_slave( "Stage %s found alternative payload for node %d" % (meta_data["state"]["name"], meta_data["id"]), self.slave_id) else: log_slave( "Provided alternative payload found invalid - bug in stage %s?" % meta_data["state"]["name"], self.slave_id) if self.exec_count > EXEC_LIMIT: # Fuzzing next queue self.exec_count = 0 self.conn.send_next_queue() self.conn.send_node_done(meta_data["id"], results, new_payload)
def trace_payload(self, data, info): trace_file_in = self.config.argument_values[ 'work_dir'] + "/redqueen_workdir_%d/pt_trace_results.txt" % self.slave_id trace_folder = self.config.argument_values['work_dir'] + "/traces/" trace_file_out = trace_folder + "payload_%05d" % info['id'] log_slave("Tracing payload_%05d.." % info['id'], self.slave_id) try: self.q.set_payload(data) exec_res = self.q.execute_in_trace_mode(timeout_detection=False) with open(trace_file_in, 'rb') as f_in: with lz4.LZ4FrameFile( trace_file_out + ".lz4", 'wb', compression_level=lz4.COMPRESSIONLEVEL_MINHC) as f_out: shutil.copyfileobj(f_in, f_out) if not exec_res.is_regular(): self.statistics.event_reload() self.q.reload() except Exception as e: log_slave( "Failed to produce trace %s: %s (skipping..)" % (trace_file_out, e), self.slave_id) return None return exec_res
def fetch_payload(self): if self.stopped(): return None if not self.vm_ready: self.vm_ready = True send_msg(KAFL_TAG_START, self.q.qemu_id, self.comm.to_master_queue, source=self.slave_id) if self.reproduce and self.reproduce != "": with open(self.reproduce, 'rb') as infile: return infile.read() while not self.stopped(): if self.payload_sem.acquire(timeout=0.1): break else: return None payload = self.payload # print(len(payload)) assert (self.state != SlaveState.WAITING) self.start_time = time.time() log_slave("fetch_payload", self.slave_id) return payload
def slave_loader(slave_id): def sigterm_handler(signal, frame): if slave_process.q: slave_process.q.async_exit() sys.exit(0) log_slave("PID: " + str(os.getpid()), slave_id) # sys.stdout = open("slave_%d.out"%slave_id, "w") config = FuzzerConfiguration() if config.argument_values["cpu_affinity"]: psutil.Process().cpu_affinity([config.argument_values["cpu_affinity"]]) else: psutil.Process().cpu_affinity([slave_id]) connection = ClientConnection(slave_id, config) slave_process = SlaveProcess(slave_id, config, connection) signal.signal(signal.SIGTERM, sigterm_handler) os.setpgrp() try: slave_process.loop() except: if slave_process.q: slave_process.q.async_exit() raise log_slave("Exit.", slave_id)
def init_stage_info(self, metadata, verbose=False): stage = metadata["state"]["name"] nid = metadata["id"] self.stage_info["stage"] = stage self.stage_info["parent"] = nid self.stage_info["method"] = "uncategorized" self.stage_info_start_time = time.time() self.stage_info_execs = 0 self.attention_secs_start = metadata.get("attention_secs", 0) self.attention_execs_start = metadata.get("attention_execs", 0) self.performance = metadata.get("performance", 0) self.initial_time = 0 self.havoc_time = 0 self.grimoire_time = 0 self.grimoire_inference_time = 0 self.redqueen_time = 0 self.slave.statistics.event_stage(stage, nid) msg = f"Launching {stage} stage on node {nid}" if stage != "import": fav_bits = len(metadata["fav_bits"]) speed = metadata["fav_factor"] qinfo = f" (fav={fav_bits}, speed={speed})" else: qinfo = "" log_slave(msg + qinfo, self.slave.slave_id) if verbose: print(f"[Slave {self.slave.slave_id}] {msg}{qinfo}")
def execute(self, data, info): self.statistics.event_exec() exec_res = self.__execute(data) is_new_input = self.bitmap_storage.should_send_to_master(exec_res) crash = exec_res.is_crash() stable = False # store crashes and any validated new behavior # do not validate timeouts and crashes at this point as they tend to be nondeterministic if is_new_input: if not crash: assert exec_res.is_lut_applied() if self.config.argument_values["funky"]: stable = self.funky_validate(data, exec_res) else: stable = self.quick_validate(data, exec_res) if not stable: # TODO: auto-throttle persistent runs based on funky rate? self.statistics.event_funky() if crash or stable: self.__send_to_master(data, exec_res, info) else: if crash: log_slave("Crashing input found (%s), but not new (discarding)" % (exec_res.exit_reason), self.slave_id) # restart Qemu on crash if crash: self.statistics.event_reload() self.q.reload() return exec_res, is_new_input
def __respond_bitmap_req(self, response): # if self.state != SlaveState.WAITING: # print("Error: slave is not waiting for input") # return self.payload = response.data assert (self.state == SlaveState.WAITING) self.state = SlaveState.PROC_BITMAP log_slave(f"release payload in __respond_bitmap_req", self.slave_id) self.payload_sem.release()
def slave_loader(comm, slave_id): log_slave("PID: " + str(os.getpid()), slave_id) slave_process = SlaveProcess(comm, slave_id) try: slave_process.loop() except KeyboardInterrupt: comm.slave_termination.value = True log_slave("Killed!", slave_id)
def lock_concolic_thread(self): if self.slave_id < len(self.comm.concolic_locks): log_slave(f"try locking {self.slave_id}", self.slave_id) self.q.suspend() while not self.stopped(): if self.comm.concolic_locks[self.slave_id].acquire( timeout=0.1): break self.q.resume() log_slave("concolic locked", self.slave_id)
def __respond_bitmap_req(self, response): self.q.set_payload(response.data) while True: try: bitmap = self.q.send_payload() break except: log_slave("__respond_bitmap_req failed...\n%s"%(traceback.format_exc()), self.slave_id) self.__restart_vm() send_msg(KAFL_TAG_REQ_BITMAP, bitmap, self.comm.to_master_from_slave_queue, source=self.slave_id)
def handle_havoc(self, payload, metadata): grimoire_time = 0 havoc_time = 0 splice_time = 0 radamsa_time = 0 havoc_afl = True havoc_splice = True havoc_radamsa = self.config.argument_values['radamsa'] havoc_grimoire = self.config.argument_values["grimoire"] havoc_redqueen = self.config.argument_values['redqueen'] for i in range(1): initial_findings = self.stage_info_findings # Dict based on RQ learned tokens # TODO: AFL only has deterministic dict stage for manual dictionary. # However RQ dict and auto-dict actually grow over time. Perhaps # create multiple dicts over time and store progress in metadata? if havoc_redqueen: self.__perform_rq_dict(payload, metadata) if havoc_grimoire: grimoire_start_time = time.time() self.__perform_grimoire(payload, metadata) grimoire_time += time.time() - grimoire_start_time if havoc_radamsa: radamsa_start_time = time.time() self.__perform_radamsa(payload, metadata) radamsa_time += time.time() - radamsa_start_time if havoc_afl: havoc_start_time = time.time() self.__perform_havoc(payload, metadata, use_splicing=False) havoc_time += time.time() - havoc_start_time if havoc_splice: splice_start_time = time.time() self.__perform_havoc(payload, metadata, use_splicing=True) splice_time += time.time() - splice_start_time # TODO: keep time/exec stats for each stage/method # - update slave_stats on stage/method done # - send timing/exec diff to Master for central node update self.havoc_time += havoc_time #self.splice_time += splice_time self.grimoire_time += grimoire_time #self.radamsa_time += radamsa_time log_slave( "HAVOC times: afl: %d, splice: %d, grim: %d, rdmsa: %d" % (havoc_time, splice_time, grimoire_time, radamsa_time), self.slave.slave_id)
def check_covered_bytes(self, bitmap): result = 0 global_cnt = 0 for i in range(self.bitmap_size): if bitmap[i] != 255: result += 1 if self.global_bitmap[i] != 0: global_cnt += 1 log_slave( 'bitmap covers %d bytes; global bitmap covers %d bytes' % (result, global_cnt), self.slave_id)
def handle_import(self, payload, metadata, retry=0): _, is_new = self.execute(payload, label="import") # Inform user if seed yields no new coverage. This may happen if -ip0 is # wrong or the harness is buggy. # # TODO: We also seem to have some corner case where PT feedback does not # work and the seed has to be provided multiple times to actually # (eventually) be recognized correctly.. if not is_new: print("Imported payload produced no new coverage, skipping..") log_slave("`Imported payload produced no new coverage, skipping..", self.slave.slave_id)
def quick_validate(self, data, old_res, quiet=False): # Validate in persistent mode. Faster but problematic for very funky targets self.statistics.event_exec() old_array = old_res.copy_to_array() new_res = self.__execute(data).apply_lut() new_array = new_res.copy_to_array() if new_array == old_array: return True if not quiet: log_slave("Input validation failed! Target is funky?..", self.slave_id) return False
def loop(self): if not self.q.start(): return log_slave("Started qemu", self.slave_id) while True: try: msg = self.conn.recv() except ConnectionResetError: log_slave("Lost connection to master. Shutting down.", self.slave_id) return False self.handle_msg(msg)
def slave_loader(slave_id): log_slave("PID: " + str(os.getpid()), slave_id) # sys.stdout = open("slave_%d.out"%slave_id, "w") config = FuzzerConfiguration() if config.argument_values["cpu_affinity"]: psutil.Process().cpu_affinity([config.argument_values["cpu_affinity"]]) else: psutil.Process().cpu_affinity([slave_id]) connection = ClientConnection(slave_id, config) slave_process = SlaveProcess(slave_id, config, connection) try: slave_process.loop() except KeyboardInterrupt: slave_process.conn.send_terminated() log_slave("Killed!", slave_id)
def handle_busy(self): busy_timeout = 1 kickstart = False if kickstart: # spend busy cycle by feeding random strings? log_slave("No ready work items, attempting random..", self.slave_id) start_time = time.time() while (time.time() - start_time) < busy_timeout: meta_data = {"state": {"name": "import"}, "id": 0} payload = rand.bytes(rand.int(32)) self.logic.process_node(payload, meta_data) else: log_slave("No ready work items, waiting...", self.slave_id) time.sleep(busy_timeout) self.conn.send_ready()
def funky_validate(self, data, old_res): # Validate in persistent mode with stochastic prop of funky results validations = 8 confirmations = 0 for _ in range(validations): if self.quick_validate(data, old_res, quiet=True): confirmations += 1 if confirmations >= 0.8*validations: return True log_slave("Funky input received %d/%d confirmations. Rejecting.." % (confirmations, validations), self.slave_id) self.store_funky(data) return False
def req_dma_idx(self, key, size, cnt): if self.globalmodel: return self.globalmodel.get_dma_idx(key, size, cnt) else: send_msg(DRIFUZZ_REQ_DMA_IDX, (key, size, cnt), \ self.comm.to_modelserver_queue, source=self.slave_id) # response = recv_tagged_msg(self.comm.to_slave_queues[self.slave_id], DRIFUZZ_REQ_READ_IDX) # print("requesting") if self.idx_sem.acquire(timeout=5): # print("requested") return self.idx else: log_slave('Req dma index: timeout', self.slave_id) # self.stop() return 0
def __execute(self, data, retry=0): try: self.q.set_payload(data) return self.q.send_payload() except (ValueError, BrokenPipeError): if retry > 2: # TODO if it reliably kills qemu, perhaps log to master for harvesting.. log_slave( "Fatal: Repeated BrokenPipeError on input: %s" % repr(data), self.slave_id) raise log_slave("SHM/pipe error, trying to restart qemu...", self.slave_id) if not self.q.restart(): raise return self.__execute(data, retry=retry + 1)
def __execute(self, data, retry=0): try: self.q.set_payload(data) return self.q.send_payload() except (ValueError, BrokenPipeError): if retry > 2: # TODO if it reliably kills qemu, perhaps log to master for harvesting.. print_fail("Slave %d aborting due to repeated SHM/socket error. Check logs." % self.slave_id) log_slave("Aborting due to repeated SHM/socket error. Payload: %s" % repr(data), self.slave_id) raise print_warning("SHM/socket error on Slave %d (retry %d)" % (self.slave_id, retry)) log_slave("SHM/socket error, trying to restart qemu...", self.slave_id) self.statistics.event_reload() if not self.q.restart(): raise return self.__execute(data, retry=retry+1)
def validate(self, data, old_array): self.q.set_payload(data) self.statistics.event_exec() new_bitmap = self.q.send_payload().apply_lut() new_array = new_bitmap.copy_to_array() if new_array == old_array: return True, new_bitmap log_slave("Validation failed, ignoring this input", self.slave_id) if False: # activate detailed logging of funky bitmaps for i in range(new_bitmap.bitmap_size): if old_array[i] != new_array[i]: log_slave( "Funky bit in validation bitmap: %d (%d vs %d)" % (i, old_array[i], new_array[i]), self.slave_id) return False, None
def __respond_job_req(self, response, imported=False): # if self.state != SlaveState.WAITING: # print("Error: slave is not waiting for input") # return self.affected_bytes = response.data shm_fs = self.comm.get_master_payload_shm(self.slave_id) shm_fs.seek(0) payload_len = struct.unpack('<I', shm_fs.read(4))[0] # print('payload len:', payload_len) self.payload = shm_fs.read(payload_len) # print(self.state) assert (self.state == SlaveState.WAITING) if imported: self.state = SlaveState.PROC_IMPORT else: self.state = SlaveState.PROC_TASK log_slave(f"release payload in __respond_job_req", self.slave_id) self.payload_sem.release()
def __respond_verification(self, response): jobs = response.data[0] methods = response.data[1] results = [] i = 0 self.comm.slave_locks_A[self.slave_id].acquire() while True: payload, payload_shm_size = self.q.copy_master_payload(self.comm.get_master_payload_shm(self.slave_id), i, self.comm.get_master_payload_shm_size()) payload_content_len_init = struct.unpack("I", payload[0:4])[0] payload_content_len = perform_trim(payload_content_len_init, self.q.send_payload, self.q.modify_payload_size, self.error_handler) if payload_content_len_init != payload_content_len: log_slave("TRIM: " + "{0:.2f}".format(((payload_content_len*1.0)/(payload_content_len_init*1.0))*100.0) + "% (" + str(payload_content_len) + "/" + str(payload_content_len_init) + ")", self.slave_id) patches = jobs[0] if len(patches) > 0: log_slave("Got payload to fix with size: %d and patches %s"%( payload_content_len, patches), self.slave_id ) if len(patches): log_redq("Slave "+str(self.slave_id)+" Orig Payload: " + repr(payload[4:4+payload_content_len])) hash = HashFixer(self.q, self.redqueen_state) new_payload = hash.try_fix_data(payload[4:4+payload_content_len]) if new_payload: log_redq("Slave "+str(self.slave_id)+"Fixed Payload: " + repr("".join(map(chr,new_payload)))) payload = payload[:4]+"".join(map(chr,new_payload)) self.q.set_payload(new_payload) start_time = time.time() bitmap = self.q.send_payload(apply_patches=False) performance = time.time() - start_time log_slave("performance: " + str(1.0/performance) + " -> " + str(performance), self.slave_id) break if not bitmap: log_slave("SHM ERROR....", self.slave_id) new_bits = self.q.copy_bitmap(self.comm.get_bitmap_shm(self.slave_id), i, self.comm.get_bitmap_shm_size(), bitmap, payload, payload_shm_size, effector_mode_hash=None, apply_patches = False) if new_bits: self.q.copy_mapserver_payload(self.comm.get_mapserver_payload_shm(self.slave_id), i, self.comm.get_mapserver_payload_shm_size()) results.append(FuzzingResult(i, self.q.crashed, self.q.timeout, self.q.kasan, jobs[i], self.slave_id, performance, methods[i], mmh3.hash64(bitmap), reloaded=(self.q.timeout or self.q.crashed or self.q.kasan), new_bits=new_bits, qid=self.slave_id)) self.comm.slave_locks_B[self.slave_id].release() send_msg(KAFL_TAG_RESULT, results, self.comm.to_mapserver_queue, source=self.slave_id)
def loop(self): if not self.q.start(): return log_slave("Started qemu", self.slave_id) while True: try: msg = self.conn.recv() except ConnectionResetError: log_slave("Lost connection to master. Shutting down.", self.slave_id) return if msg["type"] == MSG_RUN_NODE: self.handle_node(msg) elif msg["type"] == MSG_IMPORT: self.handle_import(msg) elif msg["type"] == MSG_BUSY: self.handle_busy() else: raise ValueError("Unknown message type {}".format(msg))
def interprocess_proto_handler(self): response = recv_msg(self.comm.to_slave_queues[self.slave_id]) if response.tag == KAFL_TAG_JOB: self.__respond_job_req(response) send_msg(KAFL_TAG_REQ, self.q.qemu_id, self.comm.to_master_queue, source=self.slave_id) elif response.tag == KAFL_TAG_REQ_BITMAP: self.__respond_bitmap_req(response) elif response.tag == KAFL_TAG_REQ_SAMPLING: self.__respond_sampling_req(response) elif response.tag == KAFL_TAG_REQ_BENCHMARK: self.__respond_benchmark_req(response) else: log_slave("Received TAG: " + str(response.tag), self.slave_id)