def copy_file(source_file_path, destination_file_path): """Faster version of shutil.copy with buffer size.""" if not os.path.exists(source_file_path): logs.log_error('Source file %s for copy not found.' % source_file_path) return False error_occurred = False try: with open(source_file_path, 'rb') as source_file_handle: with open(destination_file_path, 'wb') as destination_file_handle: shutil.copyfileobj(source_file_handle, destination_file_handle, FILE_COPY_BUFFER_SIZE) except: error_occurred = True # Make sure that the destination file actually exists. error_occurred |= not os.path.exists(destination_file_path) if error_occurred: logs.log_warn('Failed to copy source file %s to destination file %s.' % (source_file_path, destination_file_path)) return False return True
def undercoat_api_command(*args): """Make an API call to the undercoat binary.""" logs.log(f'Running undercoat command {args}') bundle_dir = environment.get_value('FUCHSIA_RESOURCES_DIR') undercoat_path = os.path.join(bundle_dir, 'undercoat', 'undercoat') undercoat = new_process.ProcessRunner(undercoat_path, args) # The undercoat log is sent to stderr, which we capture to a tempfile with tempfile.TemporaryFile() as undercoat_log: result = undercoat.run_and_wait(stderr=undercoat_log, extra_env={'TMPDIR': get_temp_dir()}) result.output = result.output.decode('utf-8') if result.return_code != 0: # Dump the undercoat log to assist in debugging log_data = utils.read_from_handle_truncated( undercoat_log, 1024 * 1024) logs.log_warn('Log output from undercoat: ' + log_data.decode('utf-8')) # The API error message is returned on stdout raise UndercoatError('Error running undercoat command %s: %s' % (args, result.output)) return result
def connect_remote(num_retries=REMOTE_CONNECT_RETRIES, reconnect=False): """Connect to the remote device. Returns whether if we succeeded.""" # Note: we use get_adb_command_line/execute_command explicitly as # run_adb_command could call this function for recovery. device_state = get_device_state() if not reconnect and device_state == 'device': # Already connected, nothing to do here. Note that this is not a very good # check for the health of the connection. return False # First try to disconnect from the device. device_serial = environment.get_value('ANDROID_SERIAL') disconnect_cmd = get_adb_command_line('disconnect %s' % device_serial) execute_command( disconnect_cmd, timeout=REMOTE_CONNECT_TIMEOUT, log_error=True) # Now try to connect, retrying if needed. connect_cmd = get_adb_command_line('connect %s' % device_serial) for i in xrange(num_retries + 1): output = execute_command( connect_cmd, timeout=REMOTE_CONNECT_TIMEOUT, log_error=False) if output and 'connected to ' in output: # We must check the device state again, as ADB connection establishment # is just a simple TCP connection establishment with no extra checks. if get_device_state() == 'device': logs.log('Reconnected to remote device after %d tries.' % (i + 1)) return True else: # False connection, disconnect so ADB lets us connect again. execute_command( disconnect_cmd, timeout=REMOTE_CONNECT_TIMEOUT, log_error=True) time.sleep(REMOTE_CONNECT_SLEEP) logs.log_warn('Failed to reconnect to remote device.') return False
def run_command(task_name, task_argument, job_name): """Run the command.""" if task_name not in COMMAND_MAP: logs.log_error("Unknown command '%s'" % task_name) return task_module = COMMAND_MAP[task_name] # If applicable, ensure this is the only instance of the task running. task_state_name = ' '.join([task_name, task_argument, job_name]) if should_update_task_status(task_name): if not data_handler.update_task_status(task_state_name, data_types.TaskState.STARTED): logs.log('Another instance of "{}" already ' 'running, exiting.'.format(task_state_name)) raise AlreadyRunningError try: task_module.execute_task(task_argument, job_name) except errors.InvalidTestcaseError: # It is difficult to try to handle the case where a test case is deleted # during processing. Rather than trying to catch by checking every point # where a test case is reloaded from the datastore, just abort the task. logs.log_warn('Test case %s no longer exists.' % task_argument) except BaseException: # On any other exceptions, update state to reflect error and re-raise. if should_update_task_status(task_name): data_handler.update_task_status(task_state_name, data_types.TaskState.ERROR) raise # Task completed successfully. if should_update_task_status(task_name): data_handler.update_task_status(task_state_name, data_types.TaskState.FINISHED)
def get_impacts_on_prod_builds(testcase, testcase_file_path): """Get testcase impact on production builds, which are extended stable, stable and beta.""" impacts = Impacts() try: impacts.stable = get_impact_on_build('stable', testcase.impact_stable_version, testcase, testcase_file_path) except AppFailedException: return get_impacts_from_url(testcase.regression, testcase.job_type) try: impacts.beta = get_impact_on_build('beta', testcase.impact_beta_version, testcase, testcase_file_path) except AppFailedException: # If beta fails to get the binary, we ignore. At least, we have stable. pass try: impacts.extended_stable = get_impact_on_build( 'extended_stable', testcase.impact_extended_stable_version, testcase, testcase_file_path) except Exception as e: # TODO(yuanjunh): undo the exception bypass for ES. logs.log_warn( 'Caught errors in getting impact on extended stable: %s' % e) # Always record the affected head version. start_revision, end_revision = get_start_and_end_revision( testcase.regression, testcase.job_type) build_revision_mappings = build_info.get_build_to_revision_mappings() impacts.head = get_head_impact(build_revision_mappings, start_revision, end_revision) return impacts
def start_bot(bot_command): """Start the bot process.""" command, arguments = shell.get_command_and_arguments(bot_command) store_output = mozprocess.processhandler.StoreOutput() try: process_handle = mozprocess.ProcessHandlerMixin( command, arguments, kill_on_timeout=True, processOutputLine=[store_output]) process_handler.start_process(process_handle) except Exception: logs.log_error('Unable to start bot process (%s).' % bot_command) return 1 # Wait until the process terminates or until run timed out. run_timeout = environment.get_value('RUN_TIMEOUT') exit_code = process_handle.wait(timeout=run_timeout) try: process_handle.kill() except Exception: pass log_message = ('Command: %s %s (exit=%s)\n%s' % ( command, arguments, exit_code, '\n'.join(store_output.output))) if exit_code == 0: logs.log(log_message) elif exit_code == 1: # Anecdotally, exit=1 means there's a fatal Python exception. logs.log_error(log_message) else: logs.log_warn(log_message) return exit_code
def wrapped(*args, **kwargs): """Wrapper for adding retry logic.""" for retry_attempt in range(num_retries + 1): # Wait for channel to (re)connect if necessary. state = _check_channel_state(config.RECONNECT_TIMEOUT_SECONDS) if state == ChannelState.INCONSISTENT: # No point retrying if the worker is inconsistent. monitoring_metrics.HOST_INCONSISTENT_COUNT.increment() logs.log_warn('Worker got into an inconsistent state.') host_exit_no_return(return_code=0) if state == ChannelState.NOT_READY: # Channel still isn't ready. logs.log_warn( 'Channel failed to become ready within reconnect timeout.') if retry_attempt == num_retries: # Last attempt. host_exit_no_return() continue try: return func(*args, **kwargs) except grpc.RpcError as e: # For timeouts, which aren't fatal errors, resurface the right # exception. # TODO(mbarbella): Ignoring errors on the next line fixes an issue while # trying to support this code in both Python 2 and 3, but may not be # necessary in Python 3 since presumably the exception class will # allow us to properly convert it to a string. Delete after migrating. exception_message = e.message.decode('utf-8', errors='ignore') if 'TimeoutError' in exception_message: # TODO(ochang): Replace with generic TimeoutError in Python 3. raise engine.TimeoutError(e.message) if num_retries == 0: # Just re-raise the original exception if this RPC is not configured # for retries. raise logs.log_warn('Failed RPC: ' + exception_message) if retry_attempt == num_retries: # Last attempt. host_exit_no_return() time.sleep(RPC_FAIL_WAIT_TIME)
def wrapped(*args, **kwargs): """Wrapper for adding retry logic.""" for retry_attempt in range(num_retries + 1): # Wait for channel to (re)connect if necessary. state = _check_channel_state(config.RECONNECT_TIMEOUT_SECONDS) if state == ChannelState.INCONSISTENT: # No point retrying if the worker is inconsistent. monitoring_metrics.HOST_INCONSISTENT_COUNT.increment() logs.log_warn('Worker got into an inconsistent state.') host_exit_no_return(return_code=0) if state == ChannelState.NOT_READY: # Channel still isn't ready. logs.log_warn( 'Channel failed to become ready within reconnect timeout.') if retry_attempt == num_retries: # Last attempt. host_exit_no_return() continue try: return func(*args, **kwargs) except grpc.RpcError as e: # For timeouts, which aren't fatal errors, resurface the right # exception. if 'TimeoutError' in repr(e): # TODO(ochang): Replace with generic TimeoutError in Python 3. # Use __str__ to avoid newstrs. TODO(ochang): Use plain str() once # migrated to Python 3. raise engine.TimeoutError(e.__str__()) if num_retries == 0: # Just re-raise the original exception if this RPC is not configured # for retries. raise logs.log_warn('Failed RPC: ' + repr(e)) if retry_attempt == num_retries: # Last attempt. host_exit_no_return() time.sleep(RPC_FAIL_WAIT_TIME)
def kill_process_tree(root_pid): """Kill process tree.""" try: parent = psutil.Process(root_pid) children = parent.children(recursive=True) except (psutil.AccessDenied, psutil.NoSuchProcess, OSError): logs.log_warn("Failed to find or access process.") return for child in children: try: child.kill() except (psutil.AccessDenied, psutil.NoSuchProcess, OSError): logs.log_warn("Failed to kill process child.") try: parent.kill() except (psutil.AccessDenied, psutil.NoSuchProcess, OSError): logs.log_warn("Failed to kill process.")
def execute(input_directory, output_directory, fuzzer_name, generation_timeout): """Execute ML RNN generator to produce new inputs. This method should be called inside launcher, to generate a number of new inputs based on ML RNN model. It will fetch ML model from GCS bucket specified in environment variable `CORPUS_BUCKET`. The script to run the model resides in folder `tools/fuzzers/ml/rnn`. Args: input_directory: Seed corpus path. The directory should not be empty. output_directory: The directory to place generated inputs. fuzzer_name: Name of the fuzzer, e.g libpng_read_fuzzer. It indicates the subdirectory in gcs bucket to store models. generation_timeout: Time in seconds for the generator to run. Normally it takes <1s to generate an input, assuming the input length is <4KB. """ if environment.platform() != 'LINUX': logs.log('Unsupported platform for ML RNN generation, skipping.') return # Validate corpus folder. file_count = shell.get_directory_file_count(input_directory) if not file_count: logs.log('Corpus is empty. Skip generation.') return # Number of existing new inputs. They are possibly generated by other # generators. old_corpus_units = shell.get_directory_file_count(output_directory) old_corpus_bytes = shell.get_directory_size(output_directory) # Get model path. model_path = prepare_model_directory(fuzzer_name) if not model_path: return result = run(input_directory, output_directory, model_path, generation_timeout) # Generation process exited abnormally but not caused by timeout, meaning # error occurred during execution. if result.return_code and not result.timed_out: if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL: logs.log_warn( 'ML RNN generation for fuzzer %s aborted due to small corpus.' % fuzzer_name) else: logs.log_error( 'ML RNN generation for fuzzer %s failed with ExitCode = %d.' % (fuzzer_name, result.return_code), output=result.output) return # Timeout is not error, if we have new units generated. if result.timed_out: logs.log_warn('ML RNN generation for fuzzer %s timed out.' % fuzzer_name) new_corpus_units = (shell.get_directory_file_count(output_directory) - old_corpus_units) new_corpus_bytes = (shell.get_directory_size(output_directory) - old_corpus_bytes) if new_corpus_units: logs.log( 'Added %d new inputs (%d bytes) using ML RNN generator for %s.' % (new_corpus_units, new_corpus_bytes, fuzzer_name)) else: logs.log_error('ML RNN generator did not produce any inputs for %s' % fuzzer_name, output=result.output)
def iterator(archive_path, archive_obj=None, file_match_callback=None, should_extract=True): """Return an iterator for files in an archive. Extracts files if |should_extract| is True.""" archive_type = get_archive_type(archive_path) if not file_match_callback: file_match_callback = lambda _: True def maybe_extract(extract_func, info): """Returns an extracted file or None if it is not supposed to be extracted. """ if should_extract: return extract_func(info) return None if archive_type == ArchiveType.ZIP: try: with zipfile.ZipFile(archive_obj or archive_path) as zip_file: for info in zip_file.infolist(): if not file_match_callback(info.filename): continue yield ArchiveFile(info.filename, info.file_size, maybe_extract(zip_file.open, info)) except (zipfile.BadZipfile, zipfile.LargeZipFile): logs.log_error('Bad zip file %s.' % archive_path) elif archive_type == ArchiveType.TAR: try: if archive_obj: tar_file = tarfile.open(fileobj=archive_obj) else: tar_file = tarfile.open(archive_path) for info in tar_file.getmembers(): if not file_match_callback(info.name): continue yield ArchiveFile(info.name, info.size, maybe_extract(tar_file.extractfile, info)) tar_file.close() except tarfile.TarError: logs.log_error('Bad tar file %s.' % archive_path) elif archive_type == ArchiveType.TAR_LZMA: # Import lzma here so that if lzma installation fails (as it may on # Windows), other archives can still be opened. # TODO(metzman): Determine if this actually fails on Windows and move # this to the top of the file if it doesn't. from backports import lzma assert archive_obj is None, "LZMAFile doesn't support opening file handles." try: with lzma.LZMAFile(archive_path) as lzma_file, \ tarfile.open(fileobj=lzma_file) as tar_file: error_filepaths = [] for info in tar_file.getmembers(): if not file_match_callback(info.name): continue try: yield ArchiveFile( info.name, info.size, maybe_extract(tar_file.extractfile, info)) except KeyError: # Handle broken links gracefully. error_filepaths.append(info.name) yield ArchiveFile(info.name, info.size, None) if error_filepaths: logs.log_warn('Check archive %s for broken links.' % archive_path, error_filepaths=error_filepaths) except (lzma.LZMAError, tarfile.TarError): logs.log_error('Bad lzma file %s.' % archive_path) else: logs.log_error('Unsupported compression type for file %s.' % archive_path)
def flash_to_latest_build_if_needed(): """Wipes user data, resetting the device to original factory state.""" if environment.get_value('LOCAL_DEVELOPMENT'): # Don't reimage local development devices. return run_timeout = environment.get_value('RUN_TIMEOUT') if run_timeout: # If we have a run timeout, then we are already scheduled to bail out and # will be probably get re-imaged. E.g. using frameworks like Tradefed. return # Check if a flash is needed based on last recorded flash time. last_flash_time = persistent_cache.get_value( constants.LAST_FLASH_TIME_KEY, constructor=datetime.datetime.utcfromtimestamp) needs_flash = last_flash_time is None or dates.time_has_expired( last_flash_time, seconds=FLASH_INTERVAL) if not needs_flash: return is_google_device = settings.is_google_device() if is_google_device is None: logs.log_error('Unable to query device. Reimaging failed.') adb.bad_state_reached() elif not is_google_device: # We can't reimage these, skip. logs.log('Non-Google device found, skipping reimage.') return # Check if both |BUILD_BRANCH| and |BUILD_TARGET| environment variables # are set. If not, we don't have enough data for reimaging and hence # we bail out. branch = environment.get_value('BUILD_BRANCH') target = environment.get_value('BUILD_TARGET') if not target: # We default to userdebug configuration. build_params = settings.get_build_parameters() if build_params: target = build_params.get('target') + '-userdebug' # Cache target in environment. This is also useful for cases when # device is bricked and we don't have this information available. environment.set_value('BUILD_TARGET', target) if not branch or not target: logs.log_warn( 'BUILD_BRANCH and BUILD_TARGET are not set, skipping reimage.') return image_directory = environment.get_value('IMAGES_DIR') build_info = fetch_artifact.get_latest_artifact_info(branch, target) if not build_info: logs.log_error('Unable to fetch information on latest build artifact for ' 'branch %s and target %s.' % (branch, target)) return if environment.is_android_cuttlefish(): download_latest_build(build_info, FLASH_CUTTLEFISH_REGEXES, image_directory) adb.recreate_cuttlefish_device() adb.connect_to_cuttlefish_device() else: download_latest_build(build_info, FLASH_IMAGE_REGEXES, image_directory) # We do one device flash at a time on one host, otherwise we run into # failures and device being stuck in a bad state. flash_lock_key_name = 'flash:%s' % socket.gethostname() if not locks.acquire_lock(flash_lock_key_name, by_zone=True): logs.log_error('Failed to acquire lock for reimaging, exiting.') return logs.log('Reimaging started.') logs.log('Rebooting into bootloader mode.') for _ in range(FLASH_RETRIES): adb.run_as_root() adb.run_command(['reboot-bootloader']) time.sleep(FLASH_REBOOT_BOOTLOADER_WAIT) adb.run_fastboot_command(['oem', 'off-mode-charge', '0']) adb.run_fastboot_command(['-w', 'reboot-bootloader']) for partition, partition_image_filename in FLASH_IMAGE_FILES: partition_image_file_path = os.path.join(image_directory, partition_image_filename) adb.run_fastboot_command( ['flash', partition, partition_image_file_path]) if partition in ['bootloader', 'radio']: adb.run_fastboot_command(['reboot-bootloader']) # Disable ramdump to avoid capturing ramdumps during kernel crashes. # This causes device lockup of several minutes during boot and we intend # to analyze them ourselves. adb.run_fastboot_command(['oem', 'ramdump', 'disable']) adb.run_fastboot_command('reboot') time.sleep(FLASH_REBOOT_WAIT) if adb.get_device_state() == 'device': break logs.log_error('Reimaging failed, retrying.') locks.release_lock(flash_lock_key_name, by_zone=True) if adb.get_device_state() != 'device': logs.log_error('Unable to find device. Reimaging failed.') adb.bad_state_reached() logs.log('Reimaging finished.') # Reset all of our persistent keys after wipe. persistent_cache.delete_value(constants.BUILD_PROP_MD5_KEY) persistent_cache.delete_value(constants.LAST_TEST_ACCOUNT_CHECK_KEY) persistent_cache.set_value(constants.LAST_FLASH_BUILD_KEY, build_info) persistent_cache.set_value(constants.LAST_FLASH_TIME_KEY, time.time())
def run_process(cmdline, current_working_directory=None, timeout=DEFAULT_TEST_TIMEOUT, need_shell=False, gestures=None, env_copy=None, testcase_run=True, ignore_children=True): """Executes a process with a given command line and other parameters.""" if environment.is_trusted_host() and testcase_run: from bot.untrusted_runner import remote_process_host return remote_process_host.run_process(cmdline, current_working_directory, timeout, need_shell, gestures, env_copy, testcase_run, ignore_children) if gestures is None: gestures = [] if env_copy: os.environ.update(env_copy) # FIXME(mbarbella): Using LAUNCHER_PATH here is error prone. It forces us to # do certain operations before fuzzer setup (e.g. bad build check). launcher = environment.get_value('LAUNCHER_PATH') # This is used when running scripts on native linux OS and not on the device. # E.g. running a fuzzer to generate testcases or launcher script. plt = environment.platform() if plt in ['ANDROID', 'FUCHSIA'] and (not testcase_run or launcher): plt = 'LINUX' elif plt == 'IOS' and (not testcase_run or launcher): plt = 'MAC' # Lower down testcase timeout slightly to account for time for crash analysis. timeout -= CRASH_ANALYSIS_TIME # LeakSanitizer hack - give time for stdout/stderr processing. lsan = environment.get_value('LSAN', False) if lsan: timeout -= LSAN_ANALYSIS_TIME # Initialize variables. adb_output = None process_output = '' process_status = None return_code = 0 process_poll_interval = environment.get_value('PROCESS_POLL_INTERVAL', 0.5) start_time = time.time() watch_for_process_exit = (environment.get_value('WATCH_FOR_PROCESS_EXIT') if plt == 'ANDROID' else True) window_list = [] # Get gesture start time from last element in gesture list. gestures = copy.deepcopy(gestures) if gestures and gestures[-1].startswith('Trigger'): gesture_start_time = int(gestures[-1].split(':')[1]) gestures.pop() else: gesture_start_time = timeout // 2 if plt == 'ANDROID': # Clear the log upfront. android.logger.clear_log() # Run the app. adb_output = android.adb.run_command(cmdline, timeout=timeout) else: cmd, args = shell.get_command_and_arguments(cmdline) process_output = mozprocess.processhandler.StoreOutput() process_status = ProcessStatus() try: process_handle = mozprocess.ProcessHandlerMixin( cmd, args, cwd=current_working_directory, shell=need_shell, processOutputLine=[process_output], onFinish=[process_status], ignore_children=ignore_children) start_process(process_handle) except: logs.log_error('Exception occurred when running command: %s.' % cmdline) return None, None, '' while True: time.sleep(process_poll_interval) # Run the gestures at gesture_start_time or in case we didn't find windows # in the last try. if (gestures and time.time() - start_time >= gesture_start_time and not window_list): # In case, we don't find any windows, we increment the gesture start time # so that the next check is after 1 second. gesture_start_time += 1 if plt == 'LINUX': linux.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'WINDOWS': windows.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'ANDROID': android.gestures.run_gestures(gestures, start_time, timeout) # TODO(mbarbella): We add a fake window here to prevent gestures on # Android from getting executed more than once. window_list = ['FAKE'] if time.time() - start_time >= timeout: break # Collect the process output. output = (android.logger.log_output() if plt == 'ANDROID' else b'\n'.join(process_output.output)) output = utils.decode_to_unicode(output) if crash_analyzer.is_memory_tool_crash(output): break # Check if we need to bail out on process exit. if watch_for_process_exit: # If |watch_for_process_exit| is set, then we already completed running # our app launch command. So, we can bail out. if plt == 'ANDROID': break # On desktop, we bail out as soon as the process finishes. if process_status and process_status.finished: # Wait for process shutdown and set return code. process_handle.wait(timeout=PROCESS_CLEANUP_WAIT_TIME) break # Process output based on platform. if plt == 'ANDROID': # Get current log output. If device is in reboot mode, logcat automatically # waits for device to be online. time.sleep(ANDROID_CRASH_LOGCAT_WAIT_TIME) output = android.logger.log_output() if android.constants.LOW_MEMORY_REGEX.search(output): # If the device is low on memory, we should force reboot and bail out to # prevent device from getting in a frozen state. logs.log('Device is low on memory, rebooting.', output=output) android.adb.hard_reset() android.adb.wait_for_device() elif android.adb.time_since_last_reboot() < time.time() - start_time: # Check if a reboot has happened, if yes, append log output before reboot # and kernel logs content to output. log_before_last_reboot = android.logger.log_output_before_last_reboot( ) kernel_log = android.adb.get_kernel_log_content() output = '%s%s%s%s%s' % ( log_before_last_reboot, utils.get_line_seperator('Device rebooted'), output, utils.get_line_seperator('Kernel Log'), kernel_log) # Make sure to reset SE Linux Permissive Mode. This can be done cheaply # in ~0.15 sec and is needed especially between runs for kernel crashes. android.adb.run_as_root() android.settings.change_se_linux_to_permissive_mode() return_code = 1 # Add output from adb to the front. if adb_output: output = '%s\n\n%s' % (adb_output, output) # Kill the application if it is still running. We do this at the end to # prevent this from adding noise to the logcat output. task_name = environment.get_value('TASK_NAME') child_process_termination_pattern = environment.get_value( 'CHILD_PROCESS_TERMINATION_PATTERN') if task_name == 'fuzz' and child_process_termination_pattern: # In some cases, we do not want to terminate the application after each # run to avoid long startup times (e.g. for chrome). Terminate processes # matching a particular pattern for light cleanup in this case. android.adb.kill_processes_and_children_matching_name( child_process_termination_pattern) else: # There is no special termination behavior. Simply stop the application. android.app.stop() else: # Get the return code in case the process has finished already. # If the process hasn't finished, return_code will be None which is what # callers expect unless the output indicates a crash. return_code = process_handle.poll() # If the process is still running, then terminate it. if not process_status.finished: launcher_with_interpreter = shell.get_execute_command( launcher, is_blackbox_fuzzer=True) if launcher else None if (launcher_with_interpreter and cmdline.startswith(launcher_with_interpreter)): # If this was a launcher script, we KILL all child processes created # except for APP_NAME. # It is expected that, if the launcher script terminated normally, it # cleans up all the child processes it created itself. terminate_root_and_child_processes(process_handle.pid) else: try: # kill() here actually sends SIGTERM on posix. process_handle.kill() except: pass if lsan: time.sleep(LSAN_ANALYSIS_TIME) output = b'\n'.join(process_output.output) output = utils.decode_to_unicode(output) # X Server hack when max client reached. if ('Maximum number of clients reached' in output or 'Unable to get connection to X server' in output): logs.log_error('Unable to connect to X server, exiting.') os.system('sudo killall -9 Xvfb blackbox >/dev/null 2>&1') sys.exit(0) if testcase_run and (crash_analyzer.is_memory_tool_crash(output) or crash_analyzer.is_check_failure_crash(output)): return_code = 1 # If a crash is found, then we add the memory state as well. if return_code and plt == 'ANDROID': ps_output = android.adb.get_ps_output() if ps_output: output += utils.get_line_seperator('Memory Statistics') output += ps_output if return_code: logs.log_warn('Process (%s) ended with exit code (%s).' % (repr(cmdline), str(return_code)), output=output) return return_code, round(time.time() - start_time, 1), output
def flash_to_latest_build_if_needed(): """Wipes user data, resetting the device to original factory state.""" if environment.get_value('LOCAL_DEVELOPMENT'): # Don't reimage local development devices. return run_timeout = environment.get_value('RUN_TIMEOUT') if run_timeout: # If we have a run timeout, then we are already scheduled to bail out and # will be probably get re-imaged. E.g. using frameworks like Tradefed. return # Check if a flash is needed based on last recorded flash time. last_flash_time = persistent_cache.get_value( LAST_FLASH_TIME_KEY, constructor=datetime.datetime.utcfromtimestamp) needs_flash = last_flash_time is None or dates.time_has_expired( last_flash_time, seconds=adb.FLASH_INTERVAL) if not needs_flash: return build_info = {} if adb.is_gce(): adb.recreate_gce_device() else: # Physical device. is_google_device = google_device() if is_google_device is None: logs.log_error('Unable to query device. Reimaging failed.') adb.bad_state_reached() elif not is_google_device: # We can't reimage these, skip. logs.log('Non-Google device found, skipping reimage.') return else: # For Google devices. # Check if both |BUILD_BRANCH| and |BUILD_TARGET| environment variables # are set. If not, we don't have enough data for reimaging and hence # we bail out. branch = environment.get_value('BUILD_BRANCH') target = environment.get_value('BUILD_TARGET') if not target: # We default to userdebug configuration. build_params = get_build_parameters() if build_params: target = build_params.get('target') + '-userdebug' # Cache target in environment. This is also useful for cases when # device is bricked and we don't have this information available. environment.set_value('BUILD_TARGET', target) if not branch or not target: logs.log_warn( 'BUILD_BRANCH and BUILD_TARGET are not set, skipping reimage.' ) return # Download the latest build artifact for this branch and target. build_info = fetch_artifact.get_latest_artifact_info( branch, target) if not build_info: logs.log_error( 'Unable to fetch information on latest build artifact for ' 'branch %s and target %s.' % (branch, target)) return # Check if our local build matches the latest build. If not, we will # download it. build_id = build_info['bid'] target = build_info['target'] image_directory = environment.get_value('IMAGES_DIR') last_build_info = persistent_cache.get_value(LAST_FLASH_BUILD_KEY) if not last_build_info or last_build_info['bid'] != build_id: # Clean up the images directory first. shell.remove_directory(image_directory, recreate=True) # We have a new build, download the build artifacts for it. for image_regex in FLASH_IMAGE_REGEXES: image_file_path = fetch_artifact.get( build_id, target, image_regex, image_directory) if not image_file_path: logs.log_error( 'Failed to download image artifact %s for ' 'branch %s and target %s.' % (image_file_path, branch, target)) return if image_file_path.endswith('.zip'): archive.unpack(image_file_path, image_directory) # We do one device flash at a time on one host, otherwise we run into # failures and device being stuck in a bad state. flash_lock_key_name = 'flash:%s' % socket.gethostname() if not locks.acquire_lock(flash_lock_key_name, by_zone=True): logs.log_error( 'Failed to acquire lock for reimaging, exiting.') return logs.log('Reimaging started.') logs.log('Rebooting into bootloader mode.') for _ in xrange(FLASH_RETRIES): adb.run_as_root() adb.run_adb_command(['reboot-bootloader']) time.sleep(FLASH_REBOOT_BOOTLOADER_WAIT) adb.run_fastboot_command(['oem', 'off-mode-charge', '0']) adb.run_fastboot_command(['-w', 'reboot-bootloader']) for partition, partition_image_filename in FLASH_IMAGE_FILES: partition_image_file_path = os.path.join( image_directory, partition_image_filename) adb.run_fastboot_command( ['flash', partition, partition_image_file_path]) if partition in ['bootloader', 'radio']: adb.run_fastboot_command(['reboot-bootloader']) adb.run_fastboot_command('reboot') time.sleep(FLASH_REBOOT_WAIT) if adb.get_device_state() == 'device': break logs.log_error('Reimaging failed, retrying.') locks.release_lock(flash_lock_key_name, by_zone=True) if adb.get_device_state() != 'device': logs.log_error('Unable to find device. Reimaging failed.') adb.bad_state_reached() logs.log('Reimaging finished.') # Reset all of our persistent keys after wipe. persistent_cache.delete_value(BUILD_PROP_MD5_KEY) persistent_cache.delete_value(LAST_TEST_ACCOUNT_CHECK_KEY) persistent_cache.set_value(LAST_FLASH_BUILD_KEY, build_info) persistent_cache.set_value(LAST_FLASH_TIME_KEY, time.time())
def do_libfuzzer_minimization(testcase, testcase_file_path): """Use libFuzzer's built-in minimizer where appropriate.""" is_overriden_job = bool(environment.get_value('ORIGINAL_JOB_NAME')) def handle_unreproducible(): # Be more lenient with marking testcases as unreproducible when this is a # job override. if is_overriden_job: _skip_minimization(testcase, 'Unreproducible on overridden job.') else: task_creation.mark_unreproducible_if_flaky(testcase, True) timeout = environment.get_value('LIBFUZZER_MINIMIZATION_TIMEOUT', 180) rounds = environment.get_value('LIBFUZZER_MINIMIZATION_ROUNDS', 10) current_testcase_path = testcase_file_path last_crash_result = None # Get initial crash state. initial_crash_result = _run_libfuzzer_testcase(testcase, testcase_file_path) if not initial_crash_result.is_crash(): logs.log_warn('Did not crash. Output:\n' + initial_crash_result.get_stacktrace(symbolized=True)) handle_unreproducible() return if testcase.security_flag != initial_crash_result.is_security_issue(): logs.log_warn('Security flag does not match.') handle_unreproducible() return task_creation.mark_unreproducible_if_flaky(testcase, False) expected_state = initial_crash_result.get_symbolized_data() logs.log('Initial crash state: %s\n' % expected_state.crash_state) # We attempt minimization multiple times in case one round results in an # incorrect state, or runs into another issue such as a slow unit. for round_number in range(1, rounds + 1): logs.log('Minimizing round %d.' % round_number) output_file_path, crash_result = _run_libfuzzer_tool( 'minimize', testcase, current_testcase_path, timeout, expected_state.crash_state, set_dedup_flags=True) if output_file_path: last_crash_result = crash_result current_testcase_path = output_file_path if not last_crash_result: repro_command = tests.get_command_line_for_application( file_to_run=testcase_file_path, needs_http=testcase.http_flag) _skip_minimization( testcase, 'LibFuzzer minimization failed.', crash_result=initial_crash_result, command=repro_command) return logs.log('LibFuzzer minimization succeeded.') if utils.is_oss_fuzz(): # Scrub the testcase of non-essential data. cleansed_testcase_path = do_libfuzzer_cleanse( testcase, current_testcase_path, expected_state.crash_state) if cleansed_testcase_path: current_testcase_path = cleansed_testcase_path # Finalize the test case if we were able to reproduce it. repro_command = tests.get_command_line_for_application( file_to_run=current_testcase_path, needs_http=testcase.http_flag) finalize_testcase(testcase.key.id(), repro_command, last_crash_result) # Clean up after we're done. shell.clear_testcase_directories()
def run_afl_fuzz(self, fuzz_args): """Run afl-fuzz and if there is an input that causes afl-fuzz to hang or if it can't bind to a cpu, try fixing the issue and running afl-fuzz again. If there is a crash in the starting corpus then report it. Args: fuzz_args: The arguments passed to afl-fuzz. List may be modified if afl-fuzz runs into an error. Returns: A new_process.ProcessResult. """ # Define here to capture in closures. max_total_time = self.initial_max_total_time fuzz_result = None def get_time_spent_fuzzing(): """Gets the amount of time spent running afl-fuzz so far.""" return self.initial_max_total_time - max_total_time def check_error_and_log(error_regex, log_message_format): """See if error_regex can match in fuzz_result.output. If it can, then it uses the match to format and print log_message and return the match. Otherwise returns None. """ matches = re.search(error_regex, fuzz_result.output) if matches: erroring_filename = matches.groups()[0] message_format = ('Seconds spent fuzzing: {seconds}, ' + log_message_format) logs.log( message_format.format(erroring_filename, seconds=get_time_spent_fuzzing())) return erroring_filename return None # else # Decide if we want to use fast cal based on the size of the input # directory. This is only done once, but the function can be called # multiple times. This is different than the call to fast_cal_manual where # we deterministically decide to use fast cal based on how long we have # spent fuzzing. self.strategies.decide_fast_cal_random( len(os.listdir(self.afl_input.input_directory))) num_first_testcase_hangs = 0 num_retries = 0 while self.should_try_fuzzing(max_total_time, num_retries): # Increment this now so that we can just "continue" without incrementing. num_retries += 1 # If we have spent a while trying to fuzz and are still retrying, then # make sure we use AFL_FAST_CAL, otherwise we won't have much time for # fuzzing. self.strategies.decide_fast_cal_manual(get_time_spent_fuzzing()) if self.strategies.use_fast_cal: environment.set_value(constants.FAST_CAL_ENV_VAR, 1) self.afl_setup() fuzz_result = self.run_and_wait( additional_args=fuzz_args, timeout=max_total_time, terminate_before_kill=True, terminate_wait_time=self.SIGTERM_WAIT_TIME) # Reduce max_total_time by the amount of time the last attempt took. max_total_time -= fuzz_result.time_executed # Break now only if everything went well. Note that if afl finds a crash # from fuzzing (and not in the input) it will exit with a zero return # code. if fuzz_result.return_code == 0: # If afl-fuzz found a crash, copy it to the testcase_file_path. self.afl_output.copy_crash_if_needed(self.testcase_file_path) break # Else the return_code was not 0 so something didn't work out. Try fixing # this if afl-fuzz threw an error because it saw a crash, hang or large # file in the starting corpus. # If there was a crash in the input/corpus, afl-fuzz won't run, so let # ClusterFuzz know about this and quit. crash_filename = check_error_and_log(self.CRASH_REGEX, self.CRASH_LOG_MESSAGE) if crash_filename: crash_path = os.path.join(self.afl_input.input_directory, crash_filename) # Copy this file over so afl can reproduce the crash. shutil.copyfile(crash_path, self.testcase_file_path) break # afl-fuzz won't run if there is a hang in the input. hang_filename = check_error_and_log(self.HANG_REGEX, self.HANG_LOG_MESSAGE) if hang_filename: # Remove hang from queue and resume fuzzing self.afl_output.remove_hang_in_queue(hang_filename) # Now that the bad testcase has been removed, let's resume fuzzing so we # don't start again from the beginning of the corpus. self.set_resume(fuzz_args) if hang_filename.startswith('id:000000'): num_first_testcase_hangs += 1 if (num_first_testcase_hangs > self.MAX_FIRST_HANGS_WITH_DEFERRED_FORKSERVER): logs.log_warn( 'First testcase hangs when not deferring.') elif (num_first_testcase_hangs == self.MAX_FIRST_HANGS_WITH_DEFERRED_FORKSERVER): environment.set_value(constants.DONT_DEFER_FORKSERVER, 1) print( 'Instructing AFL not to defer forkserver.\nIf this fixes the ' 'fuzzer, you should add this to the .options file:\n' '[env]\n' 'afl_driver_dont_defer = 1') if num_retries - 1 > self.MAX_FUZZ_RETRIES_WITH_STRICT_TIMEOUT: skip_hangs = True self.set_timeout_arg(fuzz_args, self.MANUAL_TIMEOUT_MILLISECONDS, skip_hangs) continue # If False: then prepare_retry_if_cpu_error can't solve the issue. if self.prepare_retry_if_cpu_error(fuzz_result): continue # Try fuzzing again with the cpu error fixed. # If we can't do anything useful about the error, log it and don't try to # fuzz again. logs.log_error( ('Afl exited with a non-zero exitcode: %s. Cannot recover.' % fuzz_result.return_code), engine_output=fuzz_result.output) break return fuzz_result
def merge_corpus(self): """Merge new testcases into the input corpus.""" logs.log('Merging corpus.') # Don't tell the fuzz target to write its stderr to the same file written # to during fuzzing. The target won't write its stderr anywhere. try: del os.environ[constants.STDERR_FILENAME_ENV_VAR] except KeyError: pass self._executable_path = self.afl_showmap_path showmap_args = self.generate_afl_args(use_showmap=True) # Remove arguments for afl-fuzz. input_dir = self.afl_input.input_directory corpus_features = set() input_inodes = set() input_filenames = set() for file_path in list_full_file_paths_recursive(input_dir): file_features, timed_out = self.get_file_features( file_path, showmap_args) if timed_out: logs.log_warn( 'Timed out in merge while processing initial corpus.') return 0 input_inodes.add(os.stat(file_path).st_ino) input_filenames.add(os.path.basename(file_path)) corpus_features |= file_features merge_candidates = {} for file_path in list_full_file_paths(self.afl_output.queue): # Don't waste time merging copied files. inode = os.stat(file_path).st_ino # TODO(metzman): Make is_new_testcase capable of checking for hard links # and same files. # TODO(metzman): Replace this with portable code. if (not self.afl_output.is_new_testcase(file_path) or inode in input_inodes or # Is it a hard link? # Is it the same file? os.path.basename(file_path) in input_filenames): continue file_features, timed_out = self.get_file_features( file_path, showmap_args) if timed_out: logs.log_warn('Timed out in merge while processing output.') break # Does the file have unique features? if file_features - corpus_features: corpus_features |= file_features merge_candidates[file_features] = { 'path': file_path, 'size': os.path.getsize(file_path) } elif file_features in merge_candidates: # Replace the equivalent merge candidate if it is larger than this file. file_size = os.path.getsize(file_path) if merge_candidates[file_features]['size'] > file_size: merge_candidates[file_features] = { 'path': file_path, 'size': file_size } # Use destination file as hash of file contents to avoid overwriting # different files with the same name that were created from another # launcher instance. new_units_added = 0 for candidate in merge_candidates.itervalues(): src_path = candidate['path'] dest_filename = utils.file_hash(src_path) dest_path = os.path.join(input_dir, dest_filename) if shell.move(src_path, dest_path): new_units_added += 1 return new_units_added
def analyze_and_update_recommended_dictionary(runner, fuzzer_name, log_lines, corpus_directory, arguments): """Extract and analyze recommended dictionary from fuzzer output, then update the corresponding dictionary stored in GCS if needed.""" if environment.platform() == 'FUCHSIA': # TODO(flowerhack): Support this. return None logs.log('Extracting and analyzing recommended dictionary for %s.' % fuzzer_name) # Extract recommended dictionary elements from the log. dict_manager = dictionary_manager.DictionaryManager(fuzzer_name) recommended_dictionary = ( dict_manager.parse_recommended_dictionary_from_log_lines(log_lines)) if not recommended_dictionary: logs.log('No recommended dictionary in output from %s.' % fuzzer_name) return None # Write recommended dictionary into a file and run '-analyze_dict=1'. temp_dictionary_filename = (fuzzer_name + dictionary_manager.DICTIONARY_FILE_EXTENSION + '.tmp') temp_dictionary_path = os.path.join(fuzzer_utils.get_temp_dir(), temp_dictionary_filename) with open(temp_dictionary_path, 'wb') as file_handle: file_handle.write('\n'.join(recommended_dictionary)) dictionary_analysis = runner.analyze_dictionary( temp_dictionary_path, corpus_directory, analyze_timeout=get_dictionary_analysis_timeout(), additional_args=arguments) if dictionary_analysis.timed_out: logs.log_warn('Recommended dictionary analysis for %s timed out.' % fuzzer_name) return None if dictionary_analysis.return_code != 0: logs.log_warn('Recommended dictionary analysis for %s failed: %d.' % (fuzzer_name, dictionary_analysis.return_code)) return None # Extract dictionary elements considered useless, calculate the result. useless_dictionary = dict_manager.parse_useless_dictionary_from_data( dictionary_analysis.output) logs.log( '%d out of %d recommended dictionary elements for %s are useless.' % (len(useless_dictionary), len(recommended_dictionary), fuzzer_name)) recommended_dictionary = set(recommended_dictionary) - set( useless_dictionary) if not recommended_dictionary: return None new_elements_added = dict_manager.update_recommended_dictionary( recommended_dictionary) logs.log('Added %d new elements to the recommended dictionary for %s.' % (new_elements_added, fuzzer_name)) return recommended_dictionary
def main(argv): """Run libFuzzer as specified by argv.""" atexit.register(fuzzer_utils.cleanup) # Initialize variables. arguments = argv[1:] testcase_file_path = arguments.pop(0) target_name = arguments.pop(0) fuzzer_name = data_types.fuzz_target_project_qualified_name( utils.current_project(), target_name) # Initialize log handler. logs.configure( 'run_fuzzer', { 'fuzzer': fuzzer_name, 'engine': 'libFuzzer', 'job_name': environment.get_value('JOB_NAME') }) profiler.start_if_needed('libfuzzer_launcher') # Make sure that the fuzzer binary exists. build_directory = environment.get_value('BUILD_DIR') fuzzer_path = engine_common.find_fuzzer_path(build_directory, target_name) if not fuzzer_path: # This is an expected case when doing regression testing with old builds # that do not have that fuzz target. It can also happen when a host sends a # message to an untrusted worker that just restarted and lost information on # build directory. logs.log_warn('Could not find fuzz target %s.' % target_name) return # Install signal handler. signal.signal(signal.SIGTERM, engine_common.signal_term_handler) # Set up temp dir. engine_common.recreate_directory(fuzzer_utils.get_temp_dir()) # Setup minijail if needed. use_minijail = environment.get_value('USE_MINIJAIL') runner = libfuzzer.get_runner(fuzzer_path, temp_dir=fuzzer_utils.get_temp_dir()) if use_minijail: minijail_chroot = runner.chroot else: minijail_chroot = None # Get corpus directory. corpus_directory = environment.get_value('FUZZ_CORPUS_DIR') # Add common arguments which are necessary to be used for every run. arguments = expand_with_common_arguments(arguments) # Add sanitizer options to environment that were specified in the .options # file and options that this script requires. set_sanitizer_options(fuzzer_path) # Minimize test argument. minimize_to = fuzzer_utils.extract_argument(arguments, MINIMIZE_TO_ARGUMENT) minimize_timeout = fuzzer_utils.extract_argument( arguments, MINIMIZE_TIMEOUT_ARGUMENT) if minimize_to and minimize_timeout: minimize_testcase(runner, testcase_file_path, minimize_to, int(minimize_timeout), arguments, use_minijail) return # Cleanse argument. cleanse_to = fuzzer_utils.extract_argument(arguments, CLEANSE_TO_ARGUMENT) cleanse_timeout = fuzzer_utils.extract_argument(arguments, CLEANSE_TIMEOUT_ARGUMENT) if cleanse_to and cleanse_timeout: cleanse_testcase(runner, testcase_file_path, cleanse_to, int(cleanse_timeout), arguments, use_minijail) return # If we don't have a corpus, then that means this is not a fuzzing run. if not corpus_directory: load_testcase_if_exists(runner, testcase_file_path, fuzzer_name, use_minijail, arguments) return # We don't have a crash testcase, fuzz. # Check dict argument to make sure that it's valid. dict_argument = fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG, remove=False) if dict_argument and not os.path.exists(dict_argument): logs.log_error('Invalid dict %s for %s.' % (dict_argument, fuzzer_name)) fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG) # If there's no dict argument, check for %target_binary_name%.dict file. if (not fuzzer_utils.extract_argument( arguments, constants.DICT_FLAG, remove=False)): default_dict_path = dictionary_manager.get_default_dictionary_path( fuzzer_path) if os.path.exists(default_dict_path): arguments.append(constants.DICT_FLAG + default_dict_path) fuzzing_strategies = [] # Select a generator to use for existing testcase mutations. generator = _select_generator() is_mutations_run = generator != Generator.NONE # Timeout for fuzzer run. fuzz_timeout = get_fuzz_timeout(is_mutations_run) # Set up scratch directory for writing new units. new_testcases_directory = create_corpus_directory('new') # Get list of corpus directories. corpus_directories = get_corpus_directories(corpus_directory, new_testcases_directory, fuzzer_path, fuzzing_strategies, minijail_chroot) # Bind corpus directories in minijail. if use_minijail: artifact_prefix = constants.ARTIFACT_PREFIX_FLAG + '/' else: artifact_prefix = '%s%s/' % (constants.ARTIFACT_PREFIX_FLAG, os.path.abspath( os.path.dirname(testcase_file_path))) # Generate new testcase mutations using radamsa, etc. if is_mutations_run: new_testcase_mutations_directory = generate_new_testcase_mutations( corpus_directory, fuzzer_name, generator, fuzzing_strategies) corpus_directories.append(new_testcase_mutations_directory) if use_minijail: bind_corpus_dirs(minijail_chroot, [new_testcase_mutations_directory]) max_len_argument = fuzzer_utils.extract_argument(arguments, constants.MAX_LEN_FLAG, remove=False) if not max_len_argument and do_random_max_length(): max_length = random.SystemRandom().randint(1, MAX_VALUE_FOR_MAX_LENGTH) arguments.append('%s%d' % (constants.MAX_LEN_FLAG, max_length)) fuzzing_strategies.append(strategy.RANDOM_MAX_LENGTH_STRATEGY) if do_recommended_dictionary(): if add_recommended_dictionary(arguments, fuzzer_name, fuzzer_path): fuzzing_strategies.append(strategy.RECOMMENDED_DICTIONARY_STRATEGY) if do_value_profile(): arguments.append(constants.VALUE_PROFILE_ARGUMENT) fuzzing_strategies.append(strategy.VALUE_PROFILE_STRATEGY) if do_fork(): max_fuzz_threads = environment.get_value('MAX_FUZZ_THREADS', 1) num_fuzz_processes = max( 1, multiprocessing.cpu_count() // max_fuzz_threads) arguments.append('%s%d' % (constants.FORK_FLAG, num_fuzz_processes)) fuzzing_strategies.append('%s_%d' % (strategy.FORK_STRATEGY, num_fuzz_processes)) extra_env = {} if do_mutator_plugin(): if use_mutator_plugin(target_name, extra_env, minijail_chroot): fuzzing_strategies.append(strategy.MUTATOR_PLUGIN_STRATEGY) # Execute the fuzzer binary with original arguments. fuzz_result = runner.fuzz(corpus_directories, fuzz_timeout=fuzz_timeout, additional_args=arguments + [artifact_prefix], extra_env=extra_env) if (not use_minijail and fuzz_result.return_code == constants.LIBFUZZER_ERROR_EXITCODE): # Minijail returns 1 if the exit code is nonzero. # Otherwise: we can assume that a return code of 1 means that libFuzzer # itself ran into an error. logs.log_error(ENGINE_ERROR_MESSAGE, engine_output=fuzz_result.output) log_lines = fuzz_result.output.splitlines() # Output can be large, so save some memory by removing reference to the # original output which is no longer needed. fuzz_result.output = None # Check if we crashed, and get the crash testcase path. crash_testcase_file_path = None for line in log_lines: match = re.match(CRASH_TESTCASE_REGEX, line) if match: crash_testcase_file_path = match.group(1) break if crash_testcase_file_path: # Write the new testcase. if use_minijail: # Convert chroot relative path to host path. Remove the leading '/' before # joining. crash_testcase_file_path = os.path.join( minijail_chroot.directory, crash_testcase_file_path[1:]) # Copy crash testcase contents into the main testcase path. shutil.move(crash_testcase_file_path, testcase_file_path) # Print the command output. log_header_format = ('Command: %s\n' 'Bot: %s\n' 'Time ran: %f\n') bot_name = environment.get_value('BOT_NAME', '') command = fuzz_result.command if use_minijail: # Remove minijail prefix. command = engine_common.strip_minijail_command(command, fuzzer_path) print(log_header_format % (engine_common.get_command_quoted(command), bot_name, fuzz_result.time_executed)) # Parse stats information based on libFuzzer output. parsed_stats = parse_log_stats(log_lines) # Extend parsed stats by additional performance features. parsed_stats.update( stats.parse_performance_features(log_lines, fuzzing_strategies, arguments)) # Set some initial stat overrides. timeout_limit = fuzzer_utils.extract_argument(arguments, constants.TIMEOUT_FLAG, remove=False) expected_duration = runner.get_max_total_time(fuzz_timeout) actual_duration = int(fuzz_result.time_executed) fuzzing_time_percent = 100 * actual_duration / float(expected_duration) stat_overrides = { 'timeout_limit': int(timeout_limit), 'expected_duration': expected_duration, 'actual_duration': actual_duration, 'fuzzing_time_percent': fuzzing_time_percent, } # Remove fuzzing arguments before merge and dictionary analysis step. remove_fuzzing_arguments(arguments) # Make a decision on whether merge step is needed at all. If there are no # new units added by libFuzzer run, then no need to do merge at all. new_units_added = shell.get_directory_file_count(new_testcases_directory) merge_error = None if new_units_added: # Merge the new units with the initial corpus. if corpus_directory not in corpus_directories: corpus_directories.append(corpus_directory) # If this times out, it's possible that we will miss some units. However, if # we're taking >10 minutes to load/merge the corpus something is going very # wrong and we probably don't want to make things worse by adding units # anyway. merge_tmp_dir = None if not use_minijail: merge_tmp_dir = os.path.join(fuzzer_utils.get_temp_dir(), 'merge_workdir') engine_common.recreate_directory(merge_tmp_dir) old_corpus_len = shell.get_directory_file_count(corpus_directory) merge_directory = create_merge_directory() corpus_directories.insert(0, merge_directory) if use_minijail: bind_corpus_dirs(minijail_chroot, [merge_directory]) merge_result = runner.merge( corpus_directories, merge_timeout=engine_common.get_merge_timeout( DEFAULT_MERGE_TIMEOUT), tmp_dir=merge_tmp_dir, additional_args=arguments) move_mergeable_units(merge_directory, corpus_directory) new_corpus_len = shell.get_directory_file_count(corpus_directory) new_units_added = 0 merge_error = None if merge_result.timed_out: merge_error = 'Merging new testcases timed out:' elif merge_result.return_code != 0: merge_error = 'Merging new testcases failed:' else: new_units_added = new_corpus_len - old_corpus_len stat_overrides['new_units_added'] = new_units_added if merge_result.output: stat_overrides.update( stats.parse_stats_from_merge_log( merge_result.output.splitlines())) else: stat_overrides['new_units_added'] = 0 logs.log('Skipped corpus merge since no new units added by fuzzing.') # Get corpus size after merge. This removes the duplicate units that were # created during this fuzzing session. stat_overrides['corpus_size'] = shell.get_directory_file_count( corpus_directory) # Delete all corpus directories except for the main one. These were temporary # directories to store new testcase mutations and have already been merged to # main corpus directory. if corpus_directory in corpus_directories: corpus_directories.remove(corpus_directory) for directory in corpus_directories: shutil.rmtree(directory, ignore_errors=True) if use_minijail: unbind_corpus_dirs(minijail_chroot, corpus_directories) # Apply overridden stats to the parsed stats prior to dumping. parsed_stats.update(stat_overrides) # Dump stats data for further uploading to BigQuery. engine_common.dump_big_query_data(parsed_stats, testcase_file_path, LIBFUZZER_PREFIX, fuzzer_name, command) # Add custom crash state based on fuzzer name (if needed). add_custom_crash_state_if_needed(fuzzer_name, log_lines, parsed_stats) for line in log_lines: print(line) # Add fuzzing strategies used. engine_common.print_fuzzing_strategies(fuzzing_strategies) # Add merge error (if any). if merge_error: print(data_types.CRASH_STACKTRACE_END_MARKER) print(merge_error) print( 'Command:', get_printable_command(merge_result.command, fuzzer_path, use_minijail)) print(merge_result.output) analyze_and_update_recommended_dictionary(runner, fuzzer_name, log_lines, corpus_directory, arguments) # Close minijail chroot. if use_minijail: minijail_chroot.close() # Record the stats to make them easily searchable in stackdriver. if new_units_added: logs.log('New units added to corpus: %d.' % new_units_added, stats=parsed_stats) else: logs.log('No new units found.', stats=parsed_stats)
def dump_instance_logs(handle): """Dump logs from an undercoat instance.""" qemu_log = undercoat_instance_command('get_logs', handle, abort_on_error=False).output logs.log_warn(qemu_log)
def _merge_new_units( self, target_path, corpus_dir, new_corpus_dir, fuzz_corpus_dirs, arguments, stat_overrides, ): """Merge new units.""" # Make a decision on whether merge step is needed at all. If there are no # new units added by libFuzzer run, then no need to do merge at all. new_units_added = shell.get_directory_file_count(new_corpus_dir) if not new_units_added: stat_overrides["new_units_added"] = 0 logs.log( "Skipped corpus merge since no new units added by fuzzing.") return # If this times out, it's possible that we will miss some units. However, if # we're taking >10 minutes to load/merge the corpus something is going very # wrong and we probably don't want to make things worse by adding units # anyway. merge_corpus = self._create_merge_corpus_dir() merge_dirs = fuzz_corpus_dirs[:] # Merge the new units with the initial corpus. if corpus_dir not in merge_dirs: merge_dirs.append(corpus_dir) old_corpus_len = shell.get_directory_file_count(corpus_dir) new_units_added = 0 try: result = self._minimize_corpus_two_step( target_path=target_path, arguments=arguments, existing_corpus_dirs=merge_dirs, new_corpus_dir=new_corpus_dir, output_corpus_dir=merge_corpus, reproducers_dir=None, max_time=engine_common.get_merge_timeout( libfuzzer.DEFAULT_MERGE_TIMEOUT), ) libfuzzer.move_mergeable_units(merge_corpus, corpus_dir) new_corpus_len = shell.get_directory_file_count(corpus_dir) new_units_added = new_corpus_len - old_corpus_len stat_overrides.update(result.stats) except (MergeError, engine.TimeoutError) as e: logs.log_warn("Merge failed.", error=e.message) stat_overrides["new_units_added"] = new_units_added # Record the stats to make them easily searchable in stackdriver. logs.log("Stats calculated.", stats=stat_overrides) if new_units_added: logs.log("New units added to corpus: %d." % new_units_added) else: logs.log("No new units found.")
def _restart_qemu(self): """Restart QEMU.""" logs.log_warn('Connection to fuzzing VM lost. Restarting.') stop_qemu() start_qemu() self._setup_device_and_fuzzer()
def store_file_in_cache(file_path, cached_files_per_directory_limit=True, force_update=False): """Get file from nfs cache if available.""" if not os.path.exists(file_path): logs.log_error( 'Local file %s does not exist, nothing to store in cache.' % file_path) return if os.path.getsize(file_path) > CACHE_SIZE_LIMIT: logs.log('File %s is too large to store in cache, skipping.' % file_path) return nfs_root = environment.get_value('NFS_ROOT') if not nfs_root: # No NFS, nothing to store in cache. return # If NFS server is not available due to heavy load, skip storage operation # altogether as we would fail to store file. if not os.path.exists(os.path.join(nfs_root, '.')): # Use . to iterate mount. logs.log_warn('Cache %s not available.' % nfs_root) return cache_file_path = get_cache_file_path(file_path) cache_directory = os.path.dirname(cache_file_path) filename = os.path.basename(file_path) if not os.path.exists(cache_directory): if not shell.create_directory(cache_directory, create_intermediates=True): logs.log_error('Failed to create cache directory %s.' % cache_directory) return # Check if the file already exists in cache. if file_exists_in_cache(cache_file_path): if not force_update: return # If we are forcing update, we need to remove current cached file and its # metadata. remove_cache_file_and_metadata(cache_file_path) # Delete old cached files beyond our maximum storage limit. if cached_files_per_directory_limit: # Get a list of cached files. cached_files_list = [] for cached_filename in os.listdir(cache_directory): if cached_filename.endswith(CACHE_METADATA_FILE_EXTENSION): continue cached_file_path = os.path.join(cache_directory, cached_filename) cached_files_list.append(cached_file_path) mtime = lambda f: os.stat(f).st_mtime last_used_cached_files_list = list( sorted(cached_files_list, key=mtime, reverse=True)) for cached_file_path in ( last_used_cached_files_list[MAX_CACHED_FILES_PER_DIRECTORY - 1:]): remove_cache_file_and_metadata(cached_file_path) # Start storing the actual file in cache now. logs.log('Started storing file %s into cache.' % filename) # Fetch lock to store this file. Try only once since if any other bot has # started to store it, we don't need to do it ourselves. Just bail out. lock_name = 'store:cache_file:%s' % utils.string_hash(cache_file_path) if not locks.acquire_lock( lock_name, max_hold_seconds=CACHE_LOCK_TIMEOUT, retries=1, by_zone=True): logs.log_warn( 'Unable to fetch lock to update cache file %s, skipping.' % filename) return # Check if another bot already updated it. if file_exists_in_cache(cache_file_path): locks.release_lock(lock_name, by_zone=True) return shell.copy_file(file_path, cache_file_path) write_cache_file_metadata(cache_file_path, file_path) time.sleep(CACHE_COPY_WAIT_TIME) error_occurred = not file_exists_in_cache(cache_file_path) locks.release_lock(lock_name, by_zone=True) if error_occurred: logs.log_error('Failed to store file %s into cache.' % filename) else: logs.log('Completed storing file %s into cache.' % filename)
def execute_task(fuzzer_name, job_type): """ Performs GradientFuzz model training. Grabs input corpus and processes it for inputs/labels. Then trains a NEUZZ-like model and uploads model dir (with all weights and metadata) to GCS. Args: fuzzer_name (str): Name of fuzzer, e.g. libpng_read_fuzzer. job_type (str): Job type, e.g. libfuzzer_chrome_asan. """ if not job_type: logs.log_error('job_type is not set when training GradientFuzz for ' + f'fuzzer {fuzzer_name}.') return # Sets up fuzzer binary build. environment.set_value('FUZZ_TARGET', fuzzer_name) build_manager.setup_build() fuzzer_binary_path = environment.get_value('APP_PATH') # Directory to place corpus. |FUZZ_INPUTS_DISK| is not size constrained. temp_directory = environment.get_value('FUZZ_INPUTS_DISK') # Recreates corpus dir without contents. corpus_directory = get_corpus_directory(temp_directory, fuzzer_name) shell.remove_directory(corpus_directory, recreate=True) # This actually downloads corpus directory based on fuzzer name from GCS. logs.log(f'Downloading corpus backup for {fuzzer_name}.') if not ml_train_utils.get_corpus(corpus_directory, fuzzer_name): logs.log_error(f'Failed to download corpus backup for {fuzzer_name}.') return # First, generate input/label pairs for training. gen_inputs_labels_result, dataset_name = gen_inputs_labels( corpus_directory, fuzzer_binary_path) if gen_inputs_labels_result.timed_out: logs.log_warn(f'Data gen script for {fuzzer_name} timed out.') # Next, invoke training script. num_inputs = len(glob.glob(os.path.join(corpus_directory, '*'))) train_result, run_name = train_gradientfuzz(fuzzer_name, dataset_name, num_inputs) # Training process exited abnormally, but not via timeout -- do not proceed. if train_result.return_code and not train_result.timed_out: if train_result.return_code == run_constants.ExitCode.CORPUS_TOO_SMALL: logs.log_warn( f'GradientFuzz training task for fuzzer {fuzzer_name} aborted ' + 'due to corpus size.') else: logs.log_error( f'GradientFuzz training task for fuzzer {fuzzer_name} failed with ' + f'ExitCode = {train_result.return_code}.', output=train_result.output) return model_directory = get_model_dir(run_name) if model_directory: upload_model_to_gcs(model_directory, fuzzer_name)
def do_corpus_pruning(context, last_execution_failed, revision): """Run corpus pruning.""" # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz # target and its related files. environment.set_value("FUZZ_TARGET", context.fuzz_target.binary) if environment.is_trusted_host(): from bot.untrusted_runner import tasks_host return tasks_host.do_corpus_pruning(context, last_execution_failed, revision) build_manager.setup_build(revision=revision) build_directory = environment.get_value("BUILD_DIR") if not build_directory: raise CorpusPruningException("Failed to setup build.") start_time = datetime.datetime.utcnow() runner = Runner(build_directory, context) pruner = CorpusPruner(runner) fuzzer_binary_name = os.path.basename(runner.target_path) # If our last execution failed, shrink to a randomized corpus of usable size # to prevent corpus from growing unbounded and recurring failures when trying # to minimize it. if last_execution_failed: for corpus_url in [ context.corpus.get_gcs_url(), context.quarantine_corpus.get_gcs_url(), ]: _limit_corpus_size(corpus_url, CORPUS_SIZE_LIMIT_FOR_FAILURES) # Get initial corpus to process from GCS. context.sync_to_disk() initial_corpus_size = shell.get_directory_file_count( context.initial_corpus_path) # Restore a small batch of quarantined units back to corpus. context.restore_quarantined_units() # Shrink to a minimized corpus using corpus merge. pruner.run( context.initial_corpus_path, context.minimized_corpus_path, context.bad_units_path, ) # Sync minimized corpus back to GCS. context.sync_to_gcs() # Create corpus backup. backup_bucket = environment.get_value("BACKUP_BUCKET") corpus_backup_url = corpus_manager.backup_corpus( backup_bucket, context.corpus, context.minimized_corpus_path) minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) logs.log("Corpus pruned from %d to %d units." % (initial_corpus_size, minimized_corpus_size_units)) # Process bad units found during merge. # Mapping of crash state -> CorpusCrash crashes = {} pruner.process_bad_units(context.bad_units_path, context.quarantine_corpus_path, crashes) context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path) # Store corpus stats into CoverageInformation entity. project_qualified_name = context.fuzz_target.project_qualified_name() today = datetime.datetime.utcnow().date() coverage_info = data_types.CoverageInformation( fuzzer=project_qualified_name, date=today) quarantine_corpus_size = shell.get_directory_file_count( context.quarantine_corpus_path) quarantine_corpus_dir_size = shell.get_directory_size( context.quarantine_corpus_path) # Populate coverage stats. coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes coverage_info.quarantine_size_units = quarantine_corpus_size coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size coverage_info.corpus_backup_location = corpus_backup_url coverage_info.corpus_location = context.corpus.get_gcs_url() coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url() # Calculate remaining time to use for shared corpus merging. time_remaining = _get_time_remaining(start_time) if time_remaining <= 0: logs.log_warn("Not enough time for shared corpus merging.") return None cross_pollinator = CrossPollinator(runner) cross_pollinator.run(time_remaining) context.sync_to_gcs() # Update corpus size stats. minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes logs.log("Finished.") result = CorpusPruningResult( coverage_info=coverage_info, crashes=list(crashes.values()), fuzzer_binary_name=fuzzer_binary_name, revision=environment.get_value("APP_REVISION"), ) return result
def do_corpus_pruning(context, last_execution_failed, revision): """Run corpus pruning.""" # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz # target and its related files. environment.set_value('FUZZ_TARGET', context.fuzz_target.binary) if environment.is_trusted_host(): from bot.untrusted_runner import tasks_host return tasks_host.do_corpus_pruning(context, last_execution_failed, revision) if not build_manager.setup_build(revision=revision): raise CorpusPruningException('Failed to setup build.') build_directory = environment.get_value('BUILD_DIR') start_time = datetime.datetime.utcnow() runner = Runner(build_directory, context) pruner = CorpusPruner(runner) fuzzer_binary_name = os.path.basename(runner.target_path) # If our last execution failed, shrink to a randomized corpus of usable size # to prevent corpus from growing unbounded and recurring failures when trying # to minimize it. if last_execution_failed: for corpus_url in [ context.corpus.get_gcs_url(), context.quarantine_corpus.get_gcs_url() ]: _limit_corpus_size(corpus_url) # Get initial corpus to process from GCS. context.sync_to_disk() initial_corpus_size = shell.get_directory_file_count( context.initial_corpus_path) # Restore a small batch of quarantined units back to corpus. context.restore_quarantined_units() # Shrink to a minimized corpus using corpus merge. pruner_stats = pruner.run(context.initial_corpus_path, context.minimized_corpus_path, context.bad_units_path) # Sync minimized corpus back to GCS. context.sync_to_gcs() # Create corpus backup. # Temporarily copy the past crash regressions folder into the minimized corpus # so that corpus backup archive can have both. regressions_input_dir = os.path.join(context.initial_corpus_path, 'regressions') regressions_output_dir = os.path.join(context.minimized_corpus_path, 'regressions') if shell.get_directory_file_count(regressions_input_dir): shutil.copytree(regressions_input_dir, regressions_output_dir) backup_bucket = environment.get_value('BACKUP_BUCKET') corpus_backup_url = corpus_manager.backup_corpus( backup_bucket, context.corpus, context.minimized_corpus_path) shell.remove_directory(regressions_output_dir) minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) logs.log('Corpus pruned from %d to %d units.' % (initial_corpus_size, minimized_corpus_size_units)) # Process bad units found during merge. # Mapping of crash state -> CorpusCrash crashes = {} pruner.process_bad_units(context.bad_units_path, context.quarantine_corpus_path, crashes) context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path) # Store corpus stats into CoverageInformation entity. project_qualified_name = context.fuzz_target.project_qualified_name() today = datetime.datetime.utcnow().date() coverage_info = data_types.CoverageInformation( fuzzer=project_qualified_name, date=today) quarantine_corpus_size = shell.get_directory_file_count( context.quarantine_corpus_path) quarantine_corpus_dir_size = shell.get_directory_size( context.quarantine_corpus_path) # Save the minimize corpus size before cross pollination to put in BigQuery. pre_pollination_corpus_size = minimized_corpus_size_units # Populate coverage stats. coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes coverage_info.quarantine_size_units = quarantine_corpus_size coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size coverage_info.corpus_backup_location = corpus_backup_url coverage_info.corpus_location = context.corpus.get_gcs_url() coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url() # Calculate remaining time to use for shared corpus merging. time_remaining = _get_time_remaining(start_time) if time_remaining <= 0: logs.log_warn('Not enough time for shared corpus merging.') return None cross_pollinator = CrossPollinator(runner) pollinator_stats = cross_pollinator.run(time_remaining) context.sync_to_gcs() # Update corpus size stats. minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes logs.log('Finished.') sources = ','.join([ fuzzer.fuzz_target.project_qualified_name() for fuzzer in context.cross_pollinate_fuzzers ]) cross_pollination_stats = None if pruner_stats and pollinator_stats: cross_pollination_stats = CrossPollinationStats( project_qualified_name, context.cross_pollination_method, sources, context.tag, initial_corpus_size, pre_pollination_corpus_size, pruner_stats['edge_coverage'], pollinator_stats['edge_coverage'], pruner_stats['feature_coverage'], pollinator_stats['feature_coverage']) return CorpusPruningResult(coverage_info=coverage_info, crashes=list(crashes.values()), fuzzer_binary_name=fuzzer_binary_name, revision=environment.get_value('APP_REVISION'), cross_pollination_stats=cross_pollination_stats)
def terminate(self): """subprocess.Popen.terminate.""" try: self._popen.terminate() except OSError: logs.log_warn("Failed to terminate process.")
def _run_libfuzzer_tool(tool_name, testcase, testcase_file_path, timeout, expected_crash_state, set_dedup_flags=False): """Run libFuzzer tool to either minimize or cleanse.""" memory_tool_options_var = environment.get_current_memory_tool_var() saved_memory_tool_options = environment.get_value(memory_tool_options_var) def _set_dedup_flags(): """Allow libFuzzer to do its own crash comparison during minimization.""" memory_tool_options = environment.get_memory_tool_options( memory_tool_options_var) memory_tool_options['symbolize'] = 1 memory_tool_options['dedup_token_length'] = 3 environment.set_memory_tool_options(memory_tool_options_var, memory_tool_options) def _unset_dedup_flags(): """Reset memory tool options.""" # This is needed so that when we re-run, we can symbolize ourselves # (ignoring inline frames). environment.set_value(memory_tool_options_var, saved_memory_tool_options) output_file_path = get_temporary_file_name(testcase_file_path) rebased_output_file_path = output_file_path if environment.is_trusted_host(): from bot.untrusted_runner import file_host file_host.copy_file_to_worker( testcase_file_path, file_host.rebase_to_worker_root(testcase_file_path)) rebased_output_file_path = file_host.rebase_to_worker_root(output_file_path) arguments = environment.get_value('APP_ARGS', '') arguments += (' --cf-{tool_name}-timeout={timeout} ' '--cf-{tool_name}-to={output_file_path}').format( tool_name=tool_name, output_file_path=rebased_output_file_path, timeout=timeout) command = tests.get_command_line_for_application( file_to_run=testcase_file_path, app_args=arguments, needs_http=testcase.http_flag) logs.log('Executing command: %s' % command) if set_dedup_flags: _set_dedup_flags() # A small buffer is added to the timeout to allow the current test to # finish, and file to be written. Since we should terminate beforehand, a # long delay only slows fuzzing in cases where it's necessary. _, _, output = process_handler.run_process(command, timeout=timeout + 60) if environment.is_trusted_host(): from bot.untrusted_runner import file_host file_host.copy_file_from_worker(rebased_output_file_path, output_file_path) if set_dedup_flags: _unset_dedup_flags() if not os.path.exists(output_file_path): logs.log_warn('LibFuzzer %s run failed.' % tool_name, output=output) return None, None # Ensure that the crash parameters match. It's possible that we will # minimize/cleanse to an unrelated bug, such as a timeout. crash_result = _run_libfuzzer_testcase(testcase, output_file_path) state = crash_result.get_symbolized_data() security_flag = crash_result.is_security_issue() if (security_flag != testcase.security_flag or state.crash_state != expected_crash_state): logs.log_warn('Ignoring unrelated crash.\n' 'State: %s (expected %s)\n' 'Security: %s (expected %s)\n' 'Output: %s\n' % (state.crash_state, expected_crash_state, security_flag, testcase.security_flag, state.crash_stacktrace)) return None, None with open(output_file_path, 'rb') as file_handle: minimized_keys = blobs.write_blob(file_handle) testcase.minimized_keys = minimized_keys testcase.put() return output_file_path, crash_result
def execute_task(fuzzer_name, job_type): """Execute ML RNN training task. The task is training RNN model by default. If more models are developed, arguments can be modified to specify which model to use. Args: fuzzer_name: Name of fuzzer, e.g. libpng_read_fuzzer. job_type: Job type, e.g. libfuzzer_chrome_asan. """ if not job_type: logs.log_error( 'job_type is not set when training ML RNN for fuzzer %s.' % fuzzer_name) return # Directory to place training files, such as logs, models, corpus. # Use |FUZZ_INPUTS_DISK| since it is not size constrained. temp_directory = environment.get_value('FUZZ_INPUTS_DISK') # Get corpus. corpus_directory = get_corpus_directory(temp_directory, fuzzer_name) shell.remove_directory(corpus_directory, recreate=True) logs.log('Downloading corpus backup for %s.' % fuzzer_name) if not ml_train_utils.get_corpus(corpus_directory, fuzzer_name): logs.log_error('Failed to download corpus backup for %s.' % fuzzer_name) return # Get the directory to save models. model_directory = get_model_files_directory(temp_directory, fuzzer_name) shell.remove_directory(model_directory, recreate=True) # Get the directory to save training logs. log_directory = get_model_log_directory(temp_directory, fuzzer_name) shell.remove_directory(log_directory, recreate=True) result = train_rnn(corpus_directory, model_directory, log_directory) # Training process exited abnormally but not caused by timeout, meaning # error occurred during execution. if result.return_code and not result.timed_out: if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL: logs.log_warn( 'ML RNN training task for fuzzer %s aborted due to small corpus.' % fuzzer_name) else: logs.log_error( 'ML RNN training task for fuzzer %s failed with ExitCode = %d.' % (fuzzer_name, result.return_code), output=result.output) return # Timing out may be caused by large training corpus, but intermediate models # are frequently saved and can be uploaded. if result.timed_out: logs.log_warn('ML RNN training task for %s timed out.' % fuzzer_name) upload_model_to_gcs(model_directory, fuzzer_name)
def dump_instance_logs(handle): """Dump logs from an undercoat instance.""" # Avoids using undercoat_instance_command in order to avoid recursion on error qemu_log = undercoat_api_command('get_logs', '-handle', handle).output logs.log_warn(qemu_log)