def s2e_concolic_testcase_watcher(seedbox, testcases): setproctitle.setproctitle('S2E test case watcher') processed = set() count = 0 i = adapters.InotifyTree(testcases, mask=IN_CLOSE_WRITE) for event in i.event_gen(): # auto join child process if event is None: continue (_, _, path, filename) = event full_name = os.path.join(path,filename) md5 = md5sum(full_name) if md5 in processed: continue processed.add(md5) count += 1 dst_file= 'id:{:06d},{}'.format(count, filename) dst = '{}/{}'.format(seedbox, dst_file) shutil.copyfile(full_name, dst) print '[{}] -> [{}]'.format(filename, dst_file)
def wait_for_dir(watched_path): watched_path = six.binary_type(watched_path.encode('utf-8')) i = adapters.InotifyTree(watched_path, mask=constants.IN_CLOSE_WRITE) for event in i.event_gen(): if event is not None: filename = path.basename(event[3]) dir_name = event[2] if filename.startswith(b'.#') or filename.startswith( b'flycheck') or filename.endswith(b"~"): continue elif dir_name.endswith(b'.cache/v/cache'): continue else: print(event) sys.exit(0)
def s2e_expdata_watcher(argv): ''' replace S2E expdata files with /dev/null symlink ''' setproctitle('S2E expdata handler') watch_dir = '{}/expdata'.format(argv['out_s2e']) tmp_dir = argv['out_s2e'] i = adapters.InotifyTree(watch_dir, mask=IN_CREATE) for event in i.event_gen(): if event is None: continue (header, _, path, filename) = event if header.mask & IN_ISDIR: continue tmplink = '{}/{}_tmp'.format(tmp_dir, filename) os.symlink(devnull, tmplink) os.rename(tmplink, os.path.join(path, filename))
def s2e_concolic_testcase_watcher(argv): setproctitle('S2E test case watcher') watch_dir = '{}/testcases'.format(argv['out_s2e']) db_config = argv['db_config'] seedbox = argv['seedbox'] processed = set() count = 0 def process_input(path, filename, count): setproctitle('S2E test case watcher: processing [{}]'.format(filename)) # 1. update interested table with testcase info src = os.path.join(path, filename) try: with open(src, 'rb') as f_src: src_content = f_src.read() md5 = md5sum(src) except Exception as excp: print getattr(excp, 'message', repr(excp)) return conn = psycopg2.connect(host=db_config['host'], port=db_config['port'], database=db_config['database'], user=db_config['user'], password=db_config['password']) cur = conn.cursor() id_file = 0 try: query = ('insert into file (hash, file) ' 'values (%s, %s) on conflict do nothing returning id;') cur.execute(query, (md5, psycopg2.Binary(src_content))) conn.commit() id_file = cur.fetchone() except KeyboardInterrupt: raise except psycopg2.IntegrityError as excp: print getattr(excp, 'message', repr(excp)) conn.rollback() except Exception as excp: print getattr(excp, 'message', repr(excp)) conn.rollback() # only update interested table if the testcase is generated by concolic mode if not filename.startswith('testcase'): # l_fn = ['s2e', 'input', idx_input, idx_interested, idx_basic_block, md5_short] l_fn = filename.split('_') if cur.description is None or id_file is None: query = 'select id from file where hash = %s;' cur.execute(query, (md5, )) id_file = cur.fetchone() if id_file is not None: query = ('update interested set ' 'idfile = %s, ' 'uri = %s, ' 'update_time=now(), ' 'status = 3 ' 'where id = %s returning id;') cur.execute(query, (id_file[0], src, l_fn[3])) rowcount = cur.rowcount if rowcount != 1: conn.rollback() else: conn.commit() conn.close() # copy file from s2e test case output directory to AFL seedbox if not filename.startswith('testcase'): dst_file = 'id:{:06d},{},{},{},{}'.format(count, l_fn[2], l_fn[3], l_fn[4], l_fn[5]) dst = '{}/{}'.format(seedbox, dst_file) else: dst_file = 'id:{:06d},{}'.format(count, filename) dst = '{}/{}'.format(seedbox, dst_file) # lastly copy file to seedbox print '[W][S2E]: [{}] -> [{}]'.format(basename(src), basename(dst)) shutil.copyfile(src, dst) i = adapters.InotifyTree(watch_dir, mask=IN_CLOSE_WRITE) for event in i.event_gen(): # auto join child process active_children() if event is None: continue (_, _, path, filename) = event md5 = md5sum(os.path.join(path, filename)) if md5 in processed: continue processed.add(md5) count += 1 p_input = Process(target=process_input, args=[path, filename, count]) p_input.start()
def afl_watcher(argv): setproctitle('AFL test case watcher') def process_input(path, filename): setproctitle('analyzing [{}:{}]'.format(basename(dirname(path)), filename)) db_config = argv['db_config'] project_id = argv['project_id'] qemu = argv['qemu'] basedir = argv['basedir'] analyzer = DynamicAnalyzer(db_config=db_config, qemu=qemu, basedir=basedir, project_id=project_id) analyzer.analyze_dynamic(os.path.join(path, filename)) def process_stats(path, filename): setproctitle('Processing fuzzer_stats for node {}'.format( basename(path))) db_config = argv['db_config'] project_id = argv['project_id'] conn = psycopg2.connect(host=db_config['host'], port=db_config['port'], database=db_config['database'], user=db_config['user'], password=db_config['password']) cur = conn.cursor() with open(os.path.join(path, filename)) as f_stats: content = f_stats.readlines() stats_dict = dict( map(str.strip, line.split(':', 1)) for line in content) stats_dict['idproject'] = project_id try: columns = stats_dict.keys() values = [stats_dict[column] for column in columns] exclude_columns = ['EXCLUDED.' + column for column in columns] query = ('insert into afl_stats ' '(%s) values %s ' 'on conflict (idproject, afl_banner) DO UPDATE SET ' '(%s) = (%s) ' 'returning id;') cur.execute( query, (AsIs(', '.join(columns)), tuple(values), AsIs(', '.join(columns)), AsIs(', '.join(exclude_columns)))) conn.commit() id_file = cur.fetchone() except psycopg2.IntegrityError as excp: print getattr(excp, 'message', repr(excp)) conn.rollback() except Exception as excp: print getattr(excp, 'message', repr(excp)) conn.rollback() conn.close() def worker(queue): setproctitle('AFL dynamic analyze dispatcher') processes = [] MAX_PROCESSES = 20 while True: try: # max number of child processes while active_children(): processes[:] = [p for p in processes if p.is_alive()] if len(processes) < MAX_PROCESSES: break time.sleep(0.1) path, filename = queue.get() p_input = Process(target=process_input, args=[path, filename]) p_input.start() processes.append(p_input) except KeyboardInterrupt: raise except Exception as excp: print getattr(excp, 'message', repr(excp)) continue watch_dir = argv['out_afl'] max_testcase_size = argv.get('max_testcase_size', 1024 * 1024 * 50) queue = Queue() processed = set() tracker = set() worker = Process(target=worker, args=[queue]) worker.start() i = adapters.InotifyTree(watch_dir, mask=IN_CLOSE_WRITE) for event in i.event_gen(): if event is None: continue (_, _, path, filename) = event # filter #1, most often: the evenet is not inside queue directory dir_base = basename(path) if not dir_base == 'queue': # possibly its the fuzzer statistics if filename == 'fuzzer_stats': p_stats = Process(target=process_stats, args=[path, filename]) p_stats.start() continue # filter #2, there is a subdirectory inside queue if not filename.startswith('id:'): continue # filter #3, do not analyze seedbox node = basename(dirname(path)) if node == 'seedbox': continue # filter #4, for some reason, *MOST* of the test case AFL created, # IN_CLOSE_WRITE event will fire twice, workaround by only handling # the event every second time. if not (path, filename) in tracker: tracker.add((path, filename)) continue tracker.remove((path, filename)) current = [] # XXX since the tracker set keeps growing, clear the set when reaches # 100 records by try to put them into queue for processing if len(tracker) > 100: while tracker: current.append(tracker.pop()) # always put current event file if reach here current.append((path, filename)) for c_path, c_filename in current: # filter #5, different nodes can generate test case with same hash md5 = md5sum(os.path.join(c_path, c_filename)) if md5 in processed: continue processed.add(md5) f_size = os.stat(os.path.join(c_path, c_filename)).st_size if f_size > max_testcase_size: print 'TEST CASE FILE SIZE TOO LARGE FOR FILE: ' print '{}:{} ({}) NOT SYNC INTO DATABASE'.format( node, c_filename, f_size) continue queue.put((c_path, c_filename)) print '[W][AFL][{}][{: >8}]: [{}] {}'.format( len(processed), basename(dirname(c_path)), c_filename, md5) active_children()
def run(self, data, store, signal, context, **kwargs): """ The main run method of the NotifyTriggerTask task. Args: data (MultiTaskData): The data object that has been passed from the predecessor task. store (DataStoreDocument): The persistent data store object that allows the task to store data for access across the current workflow run. signal (TaskSignal): The signal object for tasks. It wraps the construction and sending of signals into easy to use methods. context (TaskContext): The context in which the tasks runs. Raises: LightflowFilesystemPathError: If the specified path is not absolute. """ params = self.params.eval(data, store) # build notification mask on_file_create = constants.IN_CREATE if params.on_file_create else 0x00000000 on_file_close = constants.IN_CLOSE_WRITE if params.on_file_close else 0x00000000 on_file_delete = constants.IN_DELETE if params.on_file_delete else 0x00000000 on_file_move = constants.IN_MOVE if params.on_file_move else 0x00000000 mask = (on_file_create | on_file_close | on_file_delete | on_file_move) if not os.path.isabs(params.path): raise LightflowFilesystemPathError( 'The specified path is not an absolute path') if params.recursive: notify = adapters.InotifyTree(params.path.encode('utf-8')) else: notify = adapters.Inotify() notify.add_watch(params.path.encode('utf-8')) # setup regex if isinstance(params.exclude_mask, str): regex = re.compile(params.exclude_mask) else: regex = None # if requested, pre-fill the file list with existing files files = [] if params.use_existing: for (dir_path, dir_names, filenames) in os.walk(params.path): files.extend([os.path.join(dir_path, filename) for filename in filenames]) if not params.recursive: break if regex is not None: files = [file for file in files if regex.search(file) is None] if params.flush_existing and len(files) > 0: if self._callback is not None: self._callback(files, data, store, signal, context) del files[:] polling_event_number = 0 try: for event in notify.event_gen(): if params.event_trigger_time is not None: time.sleep(params.event_trigger_time) # check every stop_polling_rate events the stop signal polling_event_number += 1 if polling_event_number > params.stop_polling_rate: polling_event_number = 0 if signal.is_stopped: break # in case of an event check whether it matches the mask and call a dag if event is not None: (header, type_names, watch_path, filename) = event if (not header.mask & constants.IN_ISDIR) and\ (header.mask & mask): new_file = os.path.join(watch_path.decode('utf-8'), filename.decode('utf-8')) add_file = not params.skip_duplicate or \ (params.skip_duplicate and new_file not in files) if add_file and regex is not None: add_file = regex.search(new_file) is None if add_file: files.append(new_file) # as soon as enough files have been aggregated call the sub dag if len(files) >= params.aggregate: chunks = len(files) // params.aggregate for i in range(0, chunks): if self._callback is not None: self._callback(files[0:params.aggregate], data, store, signal, context) del files[0:params.aggregate] finally: if not params.recursive: notify.remove_watch(params.path.encode('utf-8')) return Action(data)
def bblog_watcher(self, bb_exec_q): ''' watcher for log files that contains the covered basic blocks of each S2E execution ''' setproctitle('S2E basic block coverage log handler') paths_bblog = '{}/output_s2e/BBLog'.format(self._basedir) check_dir(paths_bblog) processed = set() def tail_with_pid(full_path, pid): ''' simple tail implementation ''' interval = 1.0 f_log = open(full_path) while True: try: where = f_log.tell() lines = f_log.readlines() if not lines: time.sleep(interval) f_log.seek(where) else: yield lines # break the loop if process no longer exists if not psutil.pid_exists(pid): break except IOError: yield '' except KeyboardInterrupt: raise def process_log(path, filename): ''' process the basic block coverage log from S2E execution ''' full_path = os.path.join(path, filename) try: pid = int(filename.split('_')[0]) except ValueError: print 'extract pid from ' + filename + ' failed.' return for lines in tail_with_pid(full_path, pid): for line in lines: try: bb_exec_q.put(int(line.rstrip())) except ValueError: print 'convert basic block id failed. {}'.format(line.rstrip()) except Full: print 'put value into queue failed, Queue if full' except Exception: raise i = adapters.InotifyTree(paths_bblog, mask=IN_CREATE) for event in i.event_gen(): active_children() if event is None: continue (_, _, path, filename) = event if filename in processed: continue processed.add(filename) p_event = Process(target=process_log, args=[path, filename]) p_event.start()