def process_command(task): """Figures out what to do with the given task and executes the command.""" logs.log("Executing command '%s'" % task.payload()) if not task.payload().strip(): logs.log_error('Empty task received.') return # Parse task payload. task_name = task.command task_argument = task.argument job_name = task.job environment.set_value('TASK_NAME', task_name) environment.set_value('TASK_ARGUMENT', task_argument) environment.set_value('JOB_NAME', job_name) if job_name != 'none': job = data_types.Job.query(data_types.Job.name == job_name).get() # Job might be removed. In that case, we don't want an exception # raised and causing this task to be retried by another bot. if not job: logs.log_error("Job '%s' not found." % job_name) return if not job.platform: error_string = "No platform set for job '%s'" % job_name logs.log_error(error_string) raise errors.BadStateError(error_string) # A misconfiguration led to this point. Clean up the job if necessary. job_queue_suffix = tasks.queue_suffix_for_platform(job.platform) bot_queue_suffix = tasks.default_queue_suffix() if job_queue_suffix != bot_queue_suffix: # This happens rarely, store this as a hard exception. logs.log_error( 'Wrong platform for job %s: job queue [%s], bot queue [%s].' % (job_name, job_queue_suffix, bot_queue_suffix)) # Try to recreate the job in the correct task queue. new_queue = ( tasks.high_end_queue() if task.high_end else tasks.regular_queue()) new_queue += job_queue_suffix # Command override is continuously run by a bot. If we keep failing # and recreating the task, it will just DoS the entire task queue. # So, we don't create any new tasks in that case since it needs # manual intervention to fix the override anyway. if not task.is_command_override: try: tasks.add_task(task_name, task_argument, job_name, new_queue) except Exception: # This can happen on trying to publish on a non-existent topic, e.g. # a topic for a high-end bot on another platform. In this case, just # give up. logs.log_error('Failed to fix platform and re-add task.') # Add a wait interval to avoid overflowing task creation. failure_wait_interval = environment.get_value('FAIL_WAIT') time.sleep(failure_wait_interval) return if task_name != 'fuzz': # Make sure that our platform id matches that of the testcase (for # non-fuzz tasks). testcase = data_handler.get_entity_by_type_and_id(data_types.Testcase, task_argument) if testcase: current_platform_id = environment.get_platform_id() testcase_platform_id = testcase.platform_id # This indicates we are trying to run this job on the wrong platform. # This can happen when you have different type of devices (e.g # android) on the same platform group. In this case, we just recreate # the task. if (task_name != 'variant' and testcase_platform_id and not utils.fields_match(testcase_platform_id, current_platform_id)): logs.log( 'Testcase %d platform (%s) does not match with ours (%s), exiting' % (testcase.key.id(), testcase_platform_id, current_platform_id)) tasks.add_task(task_name, task_argument, job_name) return # Some fuzzers contain additional environment variables that should be # set for them. Append these for tests generated by these fuzzers and for # the fuzz command itself. fuzzer_name = None if task_name == 'fuzz': fuzzer_name = task_argument elif testcase: fuzzer_name = testcase.fuzzer_name # Get job's environment string. environment_string = job.get_environment_string() if task_name == 'minimize': # Let jobs specify a different job and fuzzer to minimize with. job_environment = job.get_environment() minimize_job_override = job_environment.get('MINIMIZE_JOB_OVERRIDE') if minimize_job_override: minimize_job = data_types.Job.query( data_types.Job.name == minimize_job_override).get() if minimize_job: environment.set_value('JOB_NAME', minimize_job_override) environment_string = minimize_job.get_environment_string() environment_string += '\nORIGINAL_JOB_NAME = %s\n' % job_name job_name = minimize_job_override else: logs.log_error( 'Job for minimization not found: %s.' % minimize_job_override) # Fallback to using own job for minimization. minimize_fuzzer_override = job_environment.get('MINIMIZE_FUZZER_OVERRIDE') fuzzer_name = minimize_fuzzer_override or fuzzer_name if fuzzer_name and not environment.is_engine_fuzzer_job(job_name): fuzzer = data_types.Fuzzer.query( data_types.Fuzzer.name == fuzzer_name).get() additional_default_variables = '' additional_variables_for_job = '' if (fuzzer and hasattr(fuzzer, 'additional_environment_string') and fuzzer.additional_environment_string): for line in fuzzer.additional_environment_string.splitlines(): # Job specific values may be defined in fuzzer additional # environment variable name strings in the form # job_name:VAR_NAME = VALUE. if '=' in line and ':' in line.split('=', 1)[0]: fuzzer_job_name, environment_definition = line.split(':', 1) if fuzzer_job_name == job_name: additional_variables_for_job += '\n%s' % environment_definition continue additional_default_variables += '\n%s' % line environment_string += additional_default_variables environment_string += additional_variables_for_job # Update environment for the job. update_environment_for_job(environment_string) # Match the cpu architecture with the ones required in the job definition. # If they don't match, then bail out and recreate task. if not is_supported_cpu_arch_for_job(): logs.log( 'Unsupported cpu architecture specified in job definition, exiting.') tasks.add_task(task_name, task_argument, job_name) return # Initial cleanup. cleanup_task_state() start_web_server_if_needed() try: run_command(task_name, task_argument, job_name) finally: # Final clean up. cleanup_task_state()
def test_for_reproducibility(testcase_path, expected_state, expected_security_flag, test_timeout, http_flag, gestures): """Test to see if a crash is fully reproducible or is a one-time crasher.""" # Cleanup any existing application instances and user profile directories. # Cleaning up temp clears user profile directories and should be done before # calling |get_command_line_for_application| call since that creates # dependencies in the profile folder. process_handler.terminate_stale_application_instances() shell.clear_temp_directory() app_directory = environment.get_value('APP_DIR') command = get_command_line_for_application(testcase_path, needs_http=http_flag) crash_count = 0 crash_retries = environment.get_value('CRASH_RETRIES') reproducible_crash_target_count = crash_retries * REPRODUCIBILITY_FACTOR warmup_timeout = environment.get_value('WARMUP_TIMEOUT') logs.log('Testing for crash (command="%s").' % command) round_number = 0 for round_number in range(1, crash_retries + 1): # Bail out early if there is no hope of finding a reproducible crash. if (crash_retries - round_number + crash_count + 1 < reproducible_crash_target_count): break run_timeout = warmup_timeout if round_number == 1 else test_timeout return_code, crash_time, output = process_handler.run_process( command, timeout=run_timeout, gestures=gestures, current_working_directory=app_directory) process_handler.terminate_stale_application_instances() crash_result = CrashResult(return_code, crash_time, output) if not crash_result.is_crash(): logs.log('No crash occurred (round {round_number}).'.format( round_number=round_number), output=output) continue state = crash_result.get_symbolized_data() logs.log( ('Crash occurred in {crash_time} seconds (round {round_number}). ' 'State:\n{crash_state}').format(crash_time=crash_time, round_number=round_number, crash_state=state.crash_state), output=state.crash_stacktrace) # If we don't have an expected crash state, set it to the one from initial # crash. if not expected_state: expected_state = state.crash_state if crash_result.is_security_issue() != expected_security_flag: logs.log('Detected a crash without the correct security flag.') continue crash_comparer = CrashComparer(state.crash_state, expected_state) if not crash_comparer.is_similar(): logs.log('Detected a crash with an unrelated state: ' 'Expected(%s), Found(%s).' % (expected_state, state.crash_state)) continue crash_count += 1 if crash_count >= reproducible_crash_target_count: logs.log('Crash is reproducible.') return True logs.log('Crash is not reproducible. Crash count: %d/%d.' % (crash_count, round_number)) return False
def check_for_bad_build(job_type, crash_revision): """Return true if the build is bad, i.e. crashes on startup.""" # Check the bad build check flag to see if we want do this. if not environment.get_value('BAD_BUILD_CHECK'): return False # Create a blank command line with no file to run and no http. command = get_command_line_for_application(file_to_run='', needs_http=False) # When checking for bad builds, we use the default window size. # We don't want to pick a custom size since it can potentially cause a # startup crash and cause a build to be detected incorrectly as bad. default_window_argument = environment.get_value('WINDOW_ARG', '') if default_window_argument: command = command.replace(' %s' % default_window_argument, '') # TSAN is slow, and boots slow on first startup. Increase the warmup # timeout for this case. if environment.tool_matches('TSAN', job_type): fast_warmup_timeout = environment.get_value('WARMUP_TIMEOUT') else: fast_warmup_timeout = environment.get_value('FAST_WARMUP_TIMEOUT') # Initialize helper variables. is_bad_build = False build_run_console_output = '' app_directory = environment.get_value('APP_DIR') # Exit all running instances. process_handler.terminate_stale_application_instances() # Check if the build is bad. return_code, crash_time, output = process_handler.run_process( command, timeout=fast_warmup_timeout, current_working_directory=app_directory) crash_result = CrashResult(return_code, crash_time, output) # 1. Need to account for startup crashes with no crash state. E.g. failed to # load shared library. So, ignore state for comparison. # 2. Ignore leaks as they don't block a build from reporting regular crashes # and also don't impact regression range calculations. if (crash_result.is_crash(ignore_state=True) and not crash_result.should_ignore() and not crash_result.get_type() in ['Direct-leak', 'Indirect-leak']): is_bad_build = True build_run_console_output = utils.get_crash_stacktrace_output( command, crash_result.get_stacktrace(symbolized=True), crash_result.get_stacktrace(symbolized=False)) logs.log('Bad build for %s detected at r%d.' % (job_type, crash_revision), output=build_run_console_output) # Exit all running instances. process_handler.terminate_stale_application_instances() # Any of the conditions below indicate that bot is in a bad state and it is # not caused by the build itself. In that case, just exit. build_state = data_handler.get_build_state(job_type, crash_revision) if is_bad_build and utils.sub_string_exists_in(BAD_STATE_HINTS, output): logs.log_fatal_and_exit( 'Bad bot environment detected, exiting.', output=build_run_console_output, snapshot=process_handler.get_runtime_snapshot()) # If none of the other bots have added information about this build, # then add it now. if (build_state == data_types.BuildState.UNMARKED and not crash_result.should_ignore()): data_handler.add_build_metadata(job_type, crash_revision, is_bad_build, build_run_console_output) return is_bad_build
def _process_corpus_crashes(context, result): """Process crashes found in the corpus.""" # Default Testcase entity values. crash_revision = result.revision job_type = environment.get_value('JOB_NAME') minimized_arguments = '%TESTCASE% ' + context.fuzz_target.binary project_name = data_handler.get_project_name(job_type) comment = 'Fuzzer %s generated corpus testcase crashed (r%s)' % ( context.fuzz_target.project_qualified_name(), crash_revision) # Generate crash reports. for crash in result.crashes: existing_testcase = data_handler.find_testcase(project_name, crash.crash_type, crash.crash_state, crash.security_flag) if existing_testcase: continue # Upload/store testcase. if environment.is_trusted_host(): from bot.untrusted_runner import file_host unit_path = os.path.join(context.bad_units_path, os.path.basename(crash.unit_path)) # Prevent the worker from escaping out of |context.bad_units_path|. if not file_host.is_directory_parent(unit_path, context.bad_units_path): raise CorpusPruningException('Invalid units path from worker.') file_host.copy_file_from_worker(crash.unit_path, unit_path) else: unit_path = crash.unit_path with open(unit_path, 'rb') as f: key = blobs.write_blob(f) # Set the absolute_path property of the Testcase to a file in FUZZ_INPUTS # instead of the local quarantine directory. absolute_testcase_path = os.path.join( environment.get_value('FUZZ_INPUTS'), 'testcase') testcase_id = data_handler.store_testcase( crash=crash, fuzzed_keys=key, minimized_keys='', regression='', fixed='', one_time_crasher_flag=False, crash_revision=crash_revision, comment=comment, absolute_path=absolute_testcase_path, fuzzer_name=context.fuzz_target.engine, fully_qualified_fuzzer_name=context.fuzz_target. fully_qualified_name(), job_type=job_type, archived=False, archive_filename='', binary_flag=True, http_flag=False, gestures=None, redzone=DEFAULT_REDZONE, disable_ubsan=False, minidump_keys=None, window_argument=None, timeout_multiplier=1.0, minimized_arguments=minimized_arguments) # Set fuzzer_binary_name in testcase metadata. testcase = data_handler.get_testcase_by_id(testcase_id) testcase.set_metadata('fuzzer_binary_name', result.fuzzer_binary_name) issue_metadata = engine_common.get_all_issue_metadata_for_testcase( testcase) if issue_metadata: for key, value in issue_metadata.items(): testcase.set_metadata(key, value, update_testcase=False) testcase.put() # Create additional tasks for testcase (starting with minimization). testcase = data_handler.get_testcase_by_id(testcase_id) task_creation.create_tasks(testcase)
def run_testcase_and_return_result_in_queue(crash_queue, thread_index, file_path, gestures, env_copy, upload_output=False): """Run a single testcase and return crash results in the crash queue.""" # Since this is running in its own process, initialize the log handler again. # This is needed for Windows where instances are not shared across child # processes. See: # https://stackoverflow.com/questions/34724643/python-logging-with-multiprocessing-root-logger-different-in-windows logs.configure('run_testcase', { 'testcase_path': file_path, }) try: # Run testcase and check whether a crash occurred or not. return_code, crash_time, output = run_testcase(thread_index, file_path, gestures, env_copy) # Pull testcase directory to host to get any stats files. if environment.is_trusted_host(): from bot.untrusted_runner import file_host file_host.pull_testcases_from_worker() # Analyze the crash. crash_output = _get_crash_output(output) crash_result = CrashResult(return_code, crash_time, crash_output) # To provide consistency between stats and logs, we use timestamp taken # from stats when uploading logs and testcase. if upload_output: log_time = _get_testcase_time(file_path) if crash_result.is_crash(): # Initialize resource list with the testcase path. resource_list = [file_path] resource_list += get_resource_paths(crash_output) # Store the crash stack file in the crash stacktrace directory # with filename as the hash of the testcase path. crash_stacks_directory = environment.get_value( 'CRASH_STACKTRACES_DIR') stack_file_path = os.path.join(crash_stacks_directory, utils.string_hash(file_path)) utils.write_data_to_file(crash_output, stack_file_path) # Put crash/no-crash results in the crash queue. crash_queue.put( Crash(file_path=file_path, crash_time=crash_time, return_code=return_code, resource_list=resource_list, gestures=gestures, stack_file_path=stack_file_path)) # Don't upload uninteresting testcases (no crash) or if there is no log to # correlate it with (not upload_output). if upload_output: upload_testcase(file_path, log_time) if upload_output: # Include full output for uploaded logs (crash output, merge output, etc). crash_result_full = CrashResult(return_code, crash_time, output) log = prepare_log_for_upload(crash_result_full.get_stacktrace(), return_code) upload_log(log, log_time) except Exception: logs.log_error('Exception occurred while running ' 'run_testcase_and_return_result_in_queue.')
def clear_data_bundles_directory(): """Clears the data bundles directory.""" remove_directory(environment.get_value('DATA_BUNDLES_DIR'), recreate=True)
def current_project(): """Return the project for the current job, or the default project.""" return environment.get_value('PROJECT_NAME', default_project_name())
def terminate_stale_application_instances(): """Kill stale instances of the application running for this command.""" if environment.is_trusted_host(): from bot.untrusted_runner import remote_process_host remote_process_host.terminate_stale_application_instances() return # Stale instance cleanup is sometimes disabled for local testing. if not environment.get_value('KILL_STALE_INSTANCES', True): return additional_process_to_kill = environment.get_value( 'ADDITIONAL_PROCESSES_TO_KILL') builds_directory = environment.get_value('BUILDS_DIR') llvm_symbolizer_filename = environment.get_executable_filename( 'llvm-symbolizer') platform = environment.platform() start_time = time.time() processes_to_kill = [] # Avoid killing the test binary when running the reproduce tool. It is # commonly in-use on the side on developer workstations. if not environment.get_value('REPRODUCE_TOOL'): app_name = environment.get_value('APP_NAME') processes_to_kill += [app_name] if additional_process_to_kill: processes_to_kill += additional_process_to_kill.split(' ') processes_to_kill = [x for x in processes_to_kill if x] if platform == 'ANDROID': # Cleanup any stale adb connections. device_serial = environment.get_value('ANDROID_SERIAL') adb_search_string = 'adb -s %s' % device_serial # Terminate llvm symbolizer processes matching exact path. This is important # for Android where multiple device instances run on same host. llvm_symbolizer_path = environment.get_llvm_symbolizer_path() terminate_processes_matching_cmd_line( [adb_search_string, llvm_symbolizer_path], kill=True) # Make sure device is online and rooted. android.adb.run_as_root() # Make sure to reset SE Linux Permissive Mode (might be lost in reboot). android.settings.change_se_linux_to_permissive_mode() # Make sure that device forwarder is running (might be lost in reboot or # process crash). android.device.setup_host_and_device_forwarder_if_needed() # Make sure that package optimization is complete (might be triggered due to # unexpected circumstances). android.app.wait_until_optimization_complete() # Reset application state, which kills its pending instances and re-grants # the storage permissions. android.app.reset() elif platform == 'WINDOWS': processes_to_kill += [ 'cdb.exe', 'handle.exe', 'msdt.exe', 'openwith.exe', 'WerFault.exe', llvm_symbolizer_filename, ] terminate_processes_matching_names(processes_to_kill, kill=True) terminate_processes_matching_cmd_line(builds_directory, kill=True) # Artifical sleep to let the processes get terminated. time.sleep(1) else: # Handle Linux and Mac platforms. processes_to_kill += [ 'addr2line', 'atos', 'chrome-devel-sandbox', 'gdb', 'nacl_helper', 'xdotool', llvm_symbolizer_filename, ] terminate_processes_matching_names(processes_to_kill, kill=True) terminate_processes_matching_cmd_line(builds_directory, kill=True) duration = int(time.time() - start_time) if duration >= 5: logs.log('Process kill took longer than usual - %s.' % str(datetime.timedelta(seconds=duration)))
def store_file_in_cache(file_path, cached_files_per_directory_limit=True, force_update=False): """Get file from nfs cache if available.""" if not os.path.exists(file_path): logs.log_error( 'Local file %s does not exist, nothing to store in cache.' % file_path) return if os.path.getsize(file_path) > CACHE_SIZE_LIMIT: logs.log('File %s is too large to store in cache, skipping.' % file_path) return nfs_root = environment.get_value('NFS_ROOT') if not nfs_root: # No NFS, nothing to store in cache. return # If NFS server is not available due to heavy load, skip storage operation # altogether as we would fail to store file. if not os.path.exists(os.path.join(nfs_root, '.')): # Use . to iterate mount. logs.log_warn('Cache %s not available.' % nfs_root) return cache_file_path = get_cache_file_path(file_path) cache_directory = os.path.dirname(cache_file_path) filename = os.path.basename(file_path) if not os.path.exists(cache_directory): if not shell.create_directory(cache_directory, create_intermediates=True): logs.log_error('Failed to create cache directory %s.' % cache_directory) return # Check if the file already exists in cache. if file_exists_in_cache(cache_file_path): if not force_update: return # If we are forcing update, we need to remove current cached file and its # metadata. remove_cache_file_and_metadata(cache_file_path) # Delete old cached files beyond our maximum storage limit. if cached_files_per_directory_limit: # Get a list of cached files. cached_files_list = [] for cached_filename in os.listdir(cache_directory): if cached_filename.endswith(CACHE_METADATA_FILE_EXTENSION): continue cached_file_path = os.path.join(cache_directory, cached_filename) cached_files_list.append(cached_file_path) mtime = lambda f: os.stat(f).st_mtime last_used_cached_files_list = list( sorted(cached_files_list, key=mtime, reverse=True)) for cached_file_path in ( last_used_cached_files_list[MAX_CACHED_FILES_PER_DIRECTORY - 1:]): remove_cache_file_and_metadata(cached_file_path) # Start storing the actual file in cache now. logs.log('Started storing file %s into cache.' % filename) # Fetch lock to store this file. Try only once since if any other bot has # started to store it, we don't need to do it ourselves. Just bail out. lock_name = 'store:cache_file:%s' % utils.string_hash(cache_file_path) if not locks.acquire_lock(lock_name, max_hold_seconds=CACHE_LOCK_TIMEOUT, retries=1, by_zone=True): logs.log_warn( 'Unable to fetch lock to update cache file %s, skipping.' % filename) return # Check if another bot already updated it. if file_exists_in_cache(cache_file_path): locks.release_lock(lock_name, by_zone=True) return shell.copy_file(file_path, cache_file_path) write_cache_file_metadata(cache_file_path, file_path) time.sleep(CACHE_COPY_WAIT_TIME) error_occurred = not file_exists_in_cache(cache_file_path) locks.release_lock(lock_name, by_zone=True) if error_occurred: logs.log_error('Failed to store file %s into cache.' % filename) else: logs.log('Completed storing file %s into cache.' % filename)
def create(self): """Configures a QEMU process which can subsequently be `run`. Assumes that initial_qemu_setup was already called exactly once. """ qemu_vars = _fetch_qemu_vars() # Get a free port for the VM, so we can SSH in later. tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tcp.bind(('localhost', 0)) _, port = tcp.getsockname() tcp.close() # Fuzzing jobs that SSH into the QEMU VM need access to this env var. environment.set_value('FUCHSIA_PORTNUM', port) environment.set_value('FUCHSIA_RESOURCES_DIR', qemu_vars['fuchsia_resources_dir']) # yapf: disable qemu_args = [ '-m', '2048', '-nographic', '-kernel', qemu_vars['kernel_path'], '-initrd', qemu_vars['initrd_path'], '-smp', '4', '-drive', ('file=' + qemu_vars['drive_path'] + ',format=raw,if=none,' 'id=blobstore'), '-device', 'virtio-blk-pci,drive=blobstore', '-monitor', 'none', '-append', '"kernel.serial=legacy TERM=dumb"', '-machine', 'q35', '-display', 'none', '-netdev', ('user,id=net0,net=192.168.3.0/24,dhcpstart=192.168.3.9,' 'host=192.168.3.2,hostfwd=tcp::') + str(port) + '-:22', '-device', 'e1000,netdev=net0,mac=52:54:00:63:5e:7b', '-L', qemu_vars['sharefiles_path'] ] # yapf: enable # Detecing KVM is tricky, so use an environment variable to determine # whether to turn it on or not. if environment.get_value('FUCHSIA_USE_KVM'): qemu_args.extend(['-cpu', 'host,migratable=no']) qemu_args.append('-enable-kvm') else: # Can't use host CPU since we don't necessarily have KVM on the machine. # Emulate a Haswell CPU with a few feature toggles. This mirrors the most # common configuration for Fuchsia VMs when using in-tree tools. qemu_args.extend(['-cpu', 'Haswell,+smap,-check,-fsgsbase']) # Get the list of fuzzers for ClusterFuzz to choose from. host = Host.from_dir( os.path.join(qemu_vars['fuchsia_resources_dir'], 'build', 'out', 'default')) Device(host, 'localhost', str(port)) Fuzzer.filter(host.fuzzers, '') # Fuzzing jobs that SSH into the QEMU VM need access to this env var. environment.set_value('FUCHSIA_PKEY_PATH', qemu_vars['pkey_path']) logs.log('Ready to run QEMU. Command: ' + qemu_vars['qemu_path'] + ' ' + str(qemu_args)) self.process_runner = new_process.ProcessRunner( qemu_vars['qemu_path'], qemu_args)
def _cleanup(): """Clean up after running the tool.""" temp_directory = environment.get_value('ROOT_DIR') assert 'tmp' in temp_directory shell.remove_directory(temp_directory)
def _check_fixed_for_custom_binary(testcase, job_type, testcase_file_path): """Simplified fixed check for test cases using custom binaries.""" revision = environment.get_value('APP_REVISION') # Update comments to reflect bot information and clean up old comments. testcase_id = testcase.key.id() testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) build_manager.setup_build() if not build_manager.check_app_path(): testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment( testcase, data_types.TaskState.ERROR, 'Build setup failed for custom binary') build_fail_wait = environment.get_value('FAIL_WAIT') tasks.add_task('progression', testcase_id, job_type, wait_time=build_fail_wait) return test_timeout = environment.get_value('TEST_TIMEOUT', 10) result = testcase_manager.test_for_crash_with_retries( testcase, testcase_file_path, test_timeout, http_flag=testcase.http_flag) _log_output(revision, result) # Re-fetch to finalize testcase updates in branches below. testcase = data_handler.get_testcase_by_id(testcase.key.id()) # If this still crashes on the most recent build, it's not fixed. The task # will be rescheduled by a cron job and re-attempted eventually. if result.is_crash(): app_path = environment.get_value('APP_PATH') command = testcase_manager.get_command_line_for_application( testcase_file_path, app_path=app_path, needs_http=testcase.http_flag) symbolized_crash_stacktrace = result.get_stacktrace(symbolized=True) unsymbolized_crash_stacktrace = result.get_stacktrace(symbolized=False) stacktrace = utils.get_crash_stacktrace_output( command, symbolized_crash_stacktrace, unsymbolized_crash_stacktrace) testcase.last_tested_crash_stacktrace = data_handler.filter_stacktrace( stacktrace) _update_completion_metadata( testcase, revision, is_crash=True, message='still crashes on latest custom build') return # Retry once on another bot to confirm our results and in case this bot is in # a bad state which we didn't catch through our usual means. if data_handler.is_first_retry_for_task(testcase, reset_after_retry=True): tasks.add_task('progression', testcase_id, job_type) _update_completion_metadata(testcase, revision) return # The bug is fixed. testcase.fixed = 'Yes' testcase.open = False _update_completion_metadata(testcase, revision, message='fixed on latest custom build')
def find_fixed_range(testcase_id, job_type): """Attempt to find the revision range where a testcase was fixed.""" deadline = tasks.get_task_completion_deadline() testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return if testcase.fixed: logs.log_error('Fixed range is already set as %s, skip.' % testcase.fixed) return # Setup testcase and its dependencies. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: return # Set a flag to indicate we are running progression task. This shows pending # status on testcase report page and avoid conflicting testcase updates by # triage cron. testcase.set_metadata('progression_pending', True) # Custom binaries are handled as special cases. if build_manager.is_custom_binary(): _check_fixed_for_custom_binary(testcase, job_type, testcase_file_path) return build_bucket_path = build_manager.get_primary_bucket_path() revision_list = build_manager.get_revisions_list(build_bucket_path, testcase=testcase) if not revision_list: data_handler.close_testcase_with_error( testcase_id, 'Failed to fetch revision list') return # Use min, max_index to mark the start and end of revision list that is used # for bisecting the progression range. Set start to the revision where noticed # the crash. Set end to the trunk revision. Also, use min, max from past run # if it timed out. min_revision = testcase.get_metadata('last_progression_min') max_revision = testcase.get_metadata('last_progression_max') if min_revision or max_revision: # Clear these to avoid using them in next run. If this run fails, then we # should try next run without them to see it succeeds. If this run succeeds, # we should still clear them to avoid capping max revision in next run. testcase = data_handler.get_testcase_by_id(testcase_id) testcase.delete_metadata('last_progression_min', update_testcase=False) testcase.delete_metadata('last_progression_max', update_testcase=False) testcase.put() last_tested_revision = testcase.get_metadata('last_tested_crash_revision') known_crash_revision = last_tested_revision or testcase.crash_revision if not min_revision: min_revision = known_crash_revision if not max_revision: max_revision = revisions.get_last_revision_in_list(revision_list) min_index = revisions.find_min_revision_index(revision_list, min_revision) if min_index is None: raise errors.BuildNotFoundError(min_revision, job_type) max_index = revisions.find_max_revision_index(revision_list, max_revision) if max_index is None: raise errors.BuildNotFoundError(max_revision, job_type) testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED, 'r%d' % max_revision) # Check to see if this testcase is still crashing now. If it is, then just # bail out. result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, max_revision, update_metadata=True) if result.is_crash(): logs.log('Found crash with same signature on latest revision r%d.' % max_revision) app_path = environment.get_value('APP_PATH') command = testcase_manager.get_command_line_for_application( testcase_file_path, app_path=app_path, needs_http=testcase.http_flag) symbolized_crash_stacktrace = result.get_stacktrace(symbolized=True) unsymbolized_crash_stacktrace = result.get_stacktrace(symbolized=False) stacktrace = utils.get_crash_stacktrace_output( command, symbolized_crash_stacktrace, unsymbolized_crash_stacktrace) testcase = data_handler.get_testcase_by_id(testcase_id) testcase.last_tested_crash_stacktrace = data_handler.filter_stacktrace( stacktrace) _update_completion_metadata( testcase, max_revision, is_crash=True, message='still crashes on latest revision r%s' % max_revision) # Since we've verified that the test case is still crashing, clear out any # metadata indicating potential flake from previous runs. task_creation.mark_unreproducible_if_flaky(testcase, False) # For chromium project, save latest crash information for later upload # to chromecrash/. state = result.get_symbolized_data() crash_uploader.save_crash_info_if_needed(testcase_id, max_revision, job_type, state.crash_type, state.crash_address, state.frames) return # Don't burden NFS server with caching these random builds. environment.set_value('CACHE_STORE', False) # Verify that we do crash in the min revision. This is assumed to be true # while we are doing the bisect. result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, min_revision) if result and not result.is_crash(): testcase = data_handler.get_testcase_by_id(testcase_id) # Retry once on another bot to confirm our result. if data_handler.is_first_retry_for_task(testcase, reset_after_retry=True): tasks.add_task('progression', testcase_id, job_type) error_message = ( 'Known crash revision %d did not crash, will retry on another bot to ' 'confirm result' % known_crash_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) _update_completion_metadata(testcase, max_revision) return _clear_progression_pending(testcase) error_message = ('Known crash revision %d did not crash' % known_crash_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) task_creation.mark_unreproducible_if_flaky(testcase, True) return # Start a binary search to find last non-crashing revision. At this point, we # know that we do crash in the min_revision, and do not crash in max_revision. while time.time() < deadline: min_revision = revision_list[min_index] max_revision = revision_list[max_index] # If the min and max revisions are one apart this is as much as we can # narrow the range. if max_index - min_index == 1: _save_fixed_range(testcase_id, min_revision, max_revision, testcase_file_path) return # Occasionally, we get into this bad state. It seems to be related to test # cases with flaky stacks, but the exact cause is unknown. if max_index - min_index < 1: testcase = data_handler.get_testcase_by_id(testcase_id) testcase.fixed = 'NA' testcase.open = False message = ('Fixed testing errored out (min and max revisions ' 'are both %d)' % min_revision) _update_completion_metadata(testcase, max_revision, message=message) return # Test the middle revision of our range. middle_index = (min_index + max_index) // 2 middle_revision = revision_list[middle_index] testcase = data_handler.get_testcase_by_id(testcase_id) log_message = 'Testing r%d (current range %d:%d)' % ( middle_revision, min_revision, max_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.WIP, log_message) try: result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, middle_revision) except errors.BadBuildError: # Skip this revision. del revision_list[middle_index] max_index -= 1 continue if result.is_crash(): min_index = middle_index else: max_index = middle_index _save_current_fixed_range_indices(testcase_id, revision_list[min_index], revision_list[max_index]) # If we've broken out of the loop, we've exceeded the deadline. Recreate the # task to pick up where we left off. testcase = data_handler.get_testcase_by_id(testcase_id) error_message = ('Timed out, current range r%d:r%d' % (revision_list[min_index], revision_list[max_index])) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) tasks.add_task('progression', testcase_id, job_type)
def get_crash_info(output): """Parse crash output to get (local) minidump path and any other information useful for crash uploading, and store in a CrashReportInfo object.""" crash_stacks_directory = environment.get_value('CRASH_STACKTRACES_DIR') platform = environment.platform() output_lines = output.splitlines() num_lines = len(output_lines) for i, line in enumerate(output_lines): if platform == 'ANDROID': # If we are on Android, the dump extraction is more complicated. # The location placed in the crash-stacktrace is of the dump itself but # in fact only the MIME of the dump exists, and will have a different # extension. We need to pull the MIME and process it. match = re.match(CRASH_DUMP_PATH_MARKER, line) if not match: continue minidump_mime_filename_base = None for j in range(i + 1, num_lines): line = output_lines[j] match = re.match(r'(.*)\.dmp', line) if match: minidump_mime_filename_base = os.path.basename(match.group(1).strip()) break if not minidump_mime_filename_base: logs.log_error('Minidump marker was found, but no path in stacktrace.') return None # Look for MIME. If none found, bail. # We might not have copied over the crash dumps yet (copying is buffered), # so we want to search both the original directory and the one to which # the minidumps should later be copied. device_directories_to_search = [ adb.DEVICE_CRASH_DUMPS_DIR, os.path.dirname(line.strip()) ] device_minidump_search_paths = [] device_minidump_mime_path = None for device_directory in device_directories_to_search: device_minidump_mime_potential_paths = adb.run_adb_shell_command( ['ls', '"%s"' % device_directory], root=True).splitlines() device_minidump_search_paths += device_minidump_mime_potential_paths for potential_path in device_minidump_mime_potential_paths: # Check that we actually found a file, and the right one (not logcat). if 'No such file or directory' in potential_path: continue if minidump_mime_filename_base not in potential_path: continue if '.up' in potential_path or '.dmp' in potential_path: device_minidump_mime_path = os.path.join(device_directory, potential_path) break # Break if we found a path. if device_minidump_mime_path is not None: break # If we still didn't find a minidump path, bail. if device_minidump_mime_path is None: logs.log_error('Could not get MIME path from ls:\n%s' % str(device_minidump_search_paths)) return None # Pull out MIME and parse to minidump file and MIME parameters. minidump_mime_filename = '%s.mime' % minidump_mime_filename_base local_minidump_mime_path = os.path.join(crash_stacks_directory, minidump_mime_filename) adb.run_adb_command([ 'pull', '"%s"' % device_minidump_mime_path, local_minidump_mime_path ]) if not os.path.exists(local_minidump_mime_path): logs.log_error('Could not pull MIME from %s to %s.' % (device_minidump_mime_path, local_minidump_mime_path)) return None crash_info = parse_mime_to_crash_report_info(local_minidump_mime_path) if crash_info is None: return None crash_info.unsymbolized_stacktrace = output return crash_info else: # Other platforms are not currently supported. logs.log_error('Unable to fetch crash information for this platform.') return None # Could not find dump location, bail out. This could also happen when we don't # have a minidump location in stack at all, e.g. when testcase does not crash # during minimization. return None
def clear_build_directory(): """Clears the build directory.""" remove_directory(environment.get_value('BUILDS_DIR'), recreate=True)
def update_source_code(): """Updates source code files with latest version from appengine.""" process_handler.cleanup_stale_processes() shell.clear_temp_directory() root_directory = environment.get_value('ROOT_DIR') temp_directory = environment.get_value('BOT_TMPDIR') temp_archive = os.path.join(temp_directory, 'clusterfuzz-source.zip') try: storage.copy_file_from(get_source_url(), temp_archive) except Exception: logs.log_error('Could not retrieve source code archive from url.') return try: file_list = archive.get_file_list(temp_archive) zip_archive = zipfile.ZipFile(temp_archive, 'r') except Exception: logs.log_error('Bad zip file.') return src_directory = os.path.join(root_directory, 'src') output_directory = os.path.dirname(root_directory) error_occurred = False normalized_file_set = set() for filepath in file_list: filename = os.path.basename(filepath) # This file cannot be updated on the fly since it is running as server. if filename == 'adb': continue absolute_filepath = os.path.join(output_directory, filepath) if os.path.altsep: absolute_filepath = absolute_filepath.replace( os.path.altsep, os.path.sep) if os.path.realpath(absolute_filepath) != absolute_filepath: continue normalized_file_set.add(absolute_filepath) try: file_extension = os.path.splitext(filename)[1] # Remove any .so files first before overwriting, as they can be loaded # in the memory of existing processes. Overwriting them directly causes # segfaults in existing processes (e.g. run.py). if file_extension == '.so' and os.path.exists(absolute_filepath): os.remove(absolute_filepath) # On Windows, to update DLLs (and native .pyd extensions), we rename it # first so that we can install the new version. if (environment.platform() == 'WINDOWS' and file_extension in ['.dll', '.pyd'] and os.path.exists(absolute_filepath)): _rename_dll_for_update(absolute_filepath) except Exception: logs.log_error('Failed to remove or move %s before extracting new ' 'version.' % absolute_filepath) try: extracted_path = zip_archive.extract(filepath, output_directory) external_attr = zip_archive.getinfo(filepath).external_attr mode = (external_attr >> 16) & 0o777 mode |= 0o440 os.chmod(extracted_path, mode) except: error_occurred = True logs.log_error('Failed to extract file %s from source archive.' % filepath) zip_archive.close() if error_occurred: return clear_pyc_files(src_directory) clear_old_files(src_directory, normalized_file_set) local_manifest_path = os.path.join(root_directory, utils.LOCAL_SOURCE_MANIFEST) source_version = utils.read_data_from_file(local_manifest_path, eval_data=False) logs.log('Source code updated to %s.' % source_version)
def clear_crash_stacktraces_directory(): """Clears the crash stacktraces directory.""" remove_directory( environment.get_value('CRASH_STACKTRACES_DIR'), recreate=True)
def qemu_setup(): """Sets up and runs a QEMU VM in the background. Returns a process.Popen object. Does not block the calling process, and teardown must be handled by the caller (use .kill()). Fuchsia fuzzers assume a QEMU VM is running; call this routine prior to beginning Fuchsia fuzzing tasks. This initialization routine assumes the following layout for fuchsia_resources_dir: * /qemu-for-fuchsia/* * /.ssh/* * target/x64/fvm.blk * target/x64/fuchsia.zbi * target/x64/multiboot.bin * build/out/default/fuzzers.json * build/out/default/ids.txt * build/out/default.zircon/tools/* * build/zircon/prebuilt/downloads/symbolize * build/buildtools/linux-x64/clang/bin/llvm-symbolizer""" # First download the Fuchsia resources locally. fuchsia_resources_dir = environment.get_value('FUCHSIA_RESOURCES_DIR') if not fuchsia_resources_dir: raise errors.FuchsiaConfigError('Could not find FUCHSIA_RESOURCES_DIR') # Then, save paths for necessary commands later. qemu_path = os.path.join(fuchsia_resources_dir, 'qemu-for-fuchsia', 'bin', 'qemu-system-x86_64') os.chmod(qemu_path, 0o550) kernel_path = os.path.join(fuchsia_resources_dir, 'target', 'x64', 'multiboot.bin') os.chmod(kernel_path, 0o644) pkey_path = os.path.join(fuchsia_resources_dir, '.ssh', 'pkey') os.chmod(pkey_path, 0o400) sharefiles_path = os.path.join(fuchsia_resources_dir, 'qemu-for-fuchsia', 'share', 'qemu') drive_path = os.path.join(fuchsia_resources_dir, 'target', 'x64', 'fvm.blk') os.chmod(drive_path, 0o644) fuchsia_zbi = os.path.join(fuchsia_resources_dir, 'target', 'x64', 'fuchsia.zbi') initrd_path = os.path.join(fuchsia_resources_dir, 'fuchsia-ssh.zbi') # Perform some more initiailization steps. extend_fvm(fuchsia_resources_dir, drive_path) add_keys_to_zbi(fuchsia_resources_dir, initrd_path, fuchsia_zbi) # Get a free port for the VM, so we can SSH in later. tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tcp.bind(('localhost', 0)) _, port = tcp.getsockname() tcp.close() # Fuzzing jobs that SSH into the QEMU VM need access to this env var. environment.set_value('FUCHSIA_PORTNUM', port) environment.set_value('FUCHSIA_RESOURCES_DIR', fuchsia_resources_dir) # yapf: disable qemu_args = [ '-m', '2048', '-nographic', '-kernel', kernel_path, '-initrd', initrd_path, '-smp', '4', '-drive', 'file=' + drive_path + ',format=raw,if=none,id=blobstore', '-device', 'virtio-blk-pci,drive=blobstore', '-monitor', 'none', '-append', '"kernel.serial=legacy TERM=dumb"', '-machine', 'q35', '-display', 'none', # Can't use host CPU since we don't necessarily have KVM on the machine. # Emulate a Haswell CPU with a few feature toggles. This mirrors the most # common configuration for Fuchsia VMs when using in-tree tools. '-cpu', 'Haswell,+smap,-check,-fsgsbase', '-netdev', ('user,id=net0,net=192.168.3.0/24,dhcpstart=192.168.3.9,' 'host=192.168.3.2,hostfwd=tcp::') + str(port) + '-:22', '-device', 'e1000,netdev=net0,mac=52:54:00:63:5e:7b', '-L', sharefiles_path ] # yapf: enable # Detecing KVM is tricky, so let's use an environment variable, set from the # docker image, to determine whether to turn it on or not. if environment.get_value('FUCHSIA_USE_KVM'): qemu_args.append('-enable-kvm') # Get the list of fuzzers for ClusterFuzz to choose from. host = Host.from_dir( os.path.join(fuchsia_resources_dir, 'build', 'out', 'default')) Device(host, 'localhost', str(port)) Fuzzer.filter(host.fuzzers, '') # Fuzzing jobs that SSH into the QEMU VM need access to this env var. environment.set_value('FUCHSIA_PKEY_PATH', pkey_path) # Finally, launch QEMU. logs.log('Running QEMU. Command: ' + qemu_path + ' ' + str(qemu_args)) qemu_process = new_process.ProcessRunner(qemu_path, qemu_args) qemu_popen = qemu_process.run(stdout=subprocess.PIPE, stderr=subprocess.PIPE) return qemu_popen
def clear_mutator_plugins_directory(): """Clears the mutator plugins directory.""" remove_directory(environment.get_value('MUTATOR_PLUGINS_DIR'), recreate=True)
def clear_common_data_bundles_directory(): """Clear the common data bundle directory.""" remove_directory(environment.get_value('FUZZ_DATA'), recreate=True)
def do_corpus_pruning(context, last_execution_failed, revision): """Run corpus pruning.""" # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz # target and its related files. environment.set_value('FUZZ_TARGET', context.fuzz_target.binary) if environment.is_trusted_host(): from bot.untrusted_runner import tasks_host return tasks_host.do_corpus_pruning(context, last_execution_failed, revision) if not build_manager.setup_build(revision=revision): raise CorpusPruningException('Failed to setup build.') build_directory = environment.get_value('BUILD_DIR') start_time = datetime.datetime.utcnow() runner = Runner(build_directory, context) pruner = CorpusPruner(runner) fuzzer_binary_name = os.path.basename(runner.target_path) # If our last execution failed, shrink to a randomized corpus of usable size # to prevent corpus from growing unbounded and recurring failures when trying # to minimize it. if last_execution_failed: for corpus_url in [ context.corpus.get_gcs_url(), context.quarantine_corpus.get_gcs_url() ]: _limit_corpus_size(corpus_url) # Get initial corpus to process from GCS. context.sync_to_disk() initial_corpus_size = shell.get_directory_file_count( context.initial_corpus_path) # Restore a small batch of quarantined units back to corpus. context.restore_quarantined_units() # Shrink to a minimized corpus using corpus merge. pruner_stats = pruner.run(context.initial_corpus_path, context.minimized_corpus_path, context.bad_units_path) # Sync minimized corpus back to GCS. context.sync_to_gcs() # Create corpus backup. # Temporarily copy the past crash regressions folder into the minimized corpus # so that corpus backup archive can have both. regressions_input_dir = os.path.join(context.initial_corpus_path, 'regressions') regressions_output_dir = os.path.join(context.minimized_corpus_path, 'regressions') if shell.get_directory_file_count(regressions_input_dir): shutil.copytree(regressions_input_dir, regressions_output_dir) backup_bucket = environment.get_value('BACKUP_BUCKET') corpus_backup_url = corpus_manager.backup_corpus( backup_bucket, context.corpus, context.minimized_corpus_path) shell.remove_directory(regressions_output_dir) minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) logs.log('Corpus pruned from %d to %d units.' % (initial_corpus_size, minimized_corpus_size_units)) # Process bad units found during merge. # Mapping of crash state -> CorpusCrash crashes = {} pruner.process_bad_units(context.bad_units_path, context.quarantine_corpus_path, crashes) context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path) # Store corpus stats into CoverageInformation entity. project_qualified_name = context.fuzz_target.project_qualified_name() today = datetime.datetime.utcnow().date() coverage_info = data_types.CoverageInformation( fuzzer=project_qualified_name, date=today) quarantine_corpus_size = shell.get_directory_file_count( context.quarantine_corpus_path) quarantine_corpus_dir_size = shell.get_directory_size( context.quarantine_corpus_path) # Save the minimize corpus size before cross pollination to put in BigQuery. pre_pollination_corpus_size = minimized_corpus_size_units # Populate coverage stats. coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes coverage_info.quarantine_size_units = quarantine_corpus_size coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size coverage_info.corpus_backup_location = corpus_backup_url coverage_info.corpus_location = context.corpus.get_gcs_url() coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url() # Calculate remaining time to use for shared corpus merging. time_remaining = _get_time_remaining(start_time) if time_remaining <= 0: logs.log_warn('Not enough time for shared corpus merging.') return None cross_pollinator = CrossPollinator(runner) pollinator_stats = cross_pollinator.run(time_remaining) context.sync_to_gcs() # Update corpus size stats. minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes logs.log('Finished.') sources = ','.join([ fuzzer.fuzz_target.project_qualified_name() for fuzzer in context.cross_pollinate_fuzzers ]) cross_pollination_stats = None if pruner_stats and pollinator_stats: cross_pollination_stats = CrossPollinationStats( project_qualified_name, context.cross_pollination_method, sources, context.tag, initial_corpus_size, pre_pollination_corpus_size, pruner_stats['edge_coverage'], pollinator_stats['edge_coverage'], pruner_stats['feature_coverage'], pollinator_stats['feature_coverage']) return CorpusPruningResult(coverage_info=coverage_info, crashes=list(crashes.values()), fuzzer_binary_name=fuzzer_binary_name, revision=environment.get_value('APP_REVISION'), cross_pollination_stats=cross_pollination_stats)
def get_work_dir(): """Return work directory for Syzkaller.""" return os.path.join(environment.get_value('FUZZ_INPUTS_DISK'), 'syzkaller')
def _get_device_corpus_db_filename(self): """Return device-specific corpus db filename.""" return environment.get_value('ANDROID_SERIAL') + '.db'
def get_adb_command_line(adb_cmd): """Return adb command line for running an adb command.""" device_serial = environment.get_value('ANDROID_SERIAL') adb_cmd_line = '%s -s %s %s' % (get_adb_path(), device_serial, adb_cmd) return adb_cmd_line
def test_for_crash_with_retries(testcase, testcase_path, test_timeout, http_flag=False, compare_crash=True): """Test for a crash and return crash parameters like crash type, crash state, crash stacktrace, etc.""" # Cleanup any existing application instances and user profile directories. # Cleaning up temp clears user profile directories and should be done before # calling |get_command_line_for_application| call since that creates # dependencies in the profile folder. process_handler.terminate_stale_application_instances() shell.clear_temp_directory() app_directory = environment.get_value('APP_DIR') command = get_command_line_for_application(testcase_path, needs_http=http_flag) crash_retries = environment.get_value('CRASH_RETRIES') flaky_stacktrace = testcase.flaky_stack warmup_timeout = environment.get_value('WARMUP_TIMEOUT') logs.log('Testing for crash (command="%s").' % command) for round_number in range(1, crash_retries + 1): run_timeout = warmup_timeout if round_number == 1 else test_timeout # TODO(ochang): Set up engine for greybox testcases. return_code, crash_time, output = process_handler.run_process( command, timeout=run_timeout, gestures=testcase.gestures, current_working_directory=app_directory) process_handler.terminate_stale_application_instances() crash_result = CrashResult(return_code, crash_time, output) if not crash_result.is_crash(): logs.log('No crash occurred (round {round_number}).'.format( round_number=round_number), output=output) continue state = crash_result.get_symbolized_data() logs.log( ('Crash occurred in {crash_time} seconds (round {round_number}). ' 'State:\n{crash_state}').format(crash_time=crash_time, round_number=round_number, crash_state=state.crash_state), output=state.crash_stacktrace) if not compare_crash or not testcase.crash_state: logs.log('Crash stacktrace comparison skipped.') return crash_result if crash_result.should_ignore(): logs.log('Crash stacktrace matched ignore signatures, ignored.') continue if crash_result.is_security_issue() != testcase.security_flag: logs.log('Crash security flag does not match, ignored.') continue if flaky_stacktrace: logs.log('Crash stacktrace is marked flaky, skipping comparison.') return crash_result crash_comparer = CrashComparer(state.crash_state, testcase.crash_state) if crash_comparer.is_similar(): logs.log('Crash stacktrace is similar to original stacktrace.') return crash_result else: logs.log('Crash stacktrace does not match original stacktrace.') logs.log("Didn't crash at all.") crash_result = CrashResult(return_code=0, crash_time=0, output=output) return crash_result
def is_gce(): """Returns if we are running in GCE environment.""" android_serial = environment.get_value('ANDROID_SERIAL') return android_serial.startswith('127.0.0.1:')
def get_command_line_for_application(file_to_run='', user_profile_index=0, app_path=None, app_args=None, needs_http=False, write_command_line_file=False): """Returns the complete command line required to execute application.""" if app_args is None: app_args = environment.get_value('APP_ARGS') if app_path is None: app_path = environment.get_value('APP_PATH') additional_command_line_flags = get_additional_command_line_flags( file_to_run) app_args_append_testcase = environment.get_value( 'APP_ARGS_APPEND_TESTCASE') app_directory = environment.get_value('APP_DIR') app_name = environment.get_value('APP_NAME') apps_argument = environment.get_value('APPS_ARG') crash_stacks_directory = environment.get_value('CRASH_STACKTRACES_DIR') debugger = environment.get_value('DEBUGGER_PATH') device_testcases_directory = android.constants.DEVICE_TESTCASES_DIR fuzzer_directory = environment.get_value('FUZZER_DIR') extension_argument = environment.get_value('EXTENSION_ARG') input_directory = environment.get_value('INPUT_DIR') plt = environment.platform() root_directory = environment.get_value('ROOT_DIR') temp_directory = environment.get_value('BOT_TMPDIR') user_profile_argument = environment.get_value('USER_PROFILE_ARG') window_argument = environment.get_value('WINDOW_ARG') user_profile_directory = get_user_profile_directory(user_profile_index) # Create user profile directory and setup contents if needed. setup_user_profile_directory_if_needed(user_profile_directory) # Handle spaces in APP_PATH. # If application path has spaces, then we need to quote it. if ' ' in app_path: app_path = '"%s"' % app_path # Prepend command with interpreter if it is a script. interpreter = shell.get_interpreter(app_name) if interpreter: app_path = '%s %s' % (interpreter, app_path) # Start creating the command line. command = '' launcher = environment.get_value('LAUNCHER_PATH') if environment.is_trusted_host() and not launcher: # Rebase the file_to_run path to the worker's root (unless we're running # under a launcher, which runs on the host). from bot.untrusted_runner import file_host file_to_run = file_host.rebase_to_worker_root(file_to_run) # Default case. testcase_path = file_to_run testcase_filename = os.path.basename(testcase_path) testcase_directory = os.path.dirname(testcase_path) testcase_file_url = utils.file_path_to_file_url(testcase_path) testcase_http_url = '' # Determine where |testcase_file_url| should point depending on platform and # whether or not a launcher script is used. if file_to_run: if launcher: # In the case of launcher scripts, the testcase file to be run resides on # the host running the launcher script. Thus |testcase_file_url|, which # may point to a location on the device for Android job types, does not # apply. Instead, the launcher script should be passed the original file # to run. By setting |testcase_file_url| to |file_to_run|, we avoid # duplicating job definitions solely for supporting launcher scripts. testcase_file_url = file_to_run # Jobs that have a launcher script which needs to be run on the host will # have app_name == launcher. In this case don't prepend launcher to # command - just use app_name. if os.path.basename(launcher) != app_name: command += launcher + ' ' elif plt in ['ANDROID']: # Android-specific testcase path fixup for fuzzers that don't rely on # launcher scripts. local_testcases_directory = environment.get_value('FUZZ_INPUTS') # Check if the file to run is in fuzzed testcases folder. If yes, then we # can substitute with a local device path. Otherwise, it is part of some # data bundle with resource dependencies and we just need to use http # host forwarder for that. if file_to_run.startswith(local_testcases_directory): testcase_relative_path = ( file_to_run[len(local_testcases_directory) + 1:]) testcase_path = os.path.join(device_testcases_directory, testcase_relative_path) testcase_file_url = utils.file_path_to_file_url(testcase_path) else: # Force use of host_forwarder based on comment above. needs_http = True # Check if the testcase needs to be loaded over http. # TODO(ochang): Make this work for trusted/untrusted. http_ip = '127.0.0.1' http_port_1 = environment.get_value('HTTP_PORT_1', 8000) relative_testcase_path = file_to_run[len(input_directory + os.path.sep):] relative_testcase_path = relative_testcase_path.replace('\\', '/') testcase_http_url = 'http://%s:%d/%s' % (http_ip, http_port_1, relative_testcase_path) if needs_http: # TODO(unassigned): Support https. testcase_file_url = testcase_http_url testcase_path = testcase_http_url # Compose app arguments. all_app_args = '' if user_profile_argument: all_app_args += ' %s=%s' % (user_profile_argument, user_profile_directory) if extension_argument and EXTENSIONS_PREFIX in testcase_filename: all_app_args += ' %s=%s' % (extension_argument, testcase_directory) if apps_argument and APPS_PREFIX in testcase_filename: all_app_args += ' %s=%s' % (apps_argument, testcase_directory) if window_argument: all_app_args += ' %s' % window_argument if additional_command_line_flags: all_app_args += ' %s' % additional_command_line_flags.strip() if app_args: all_app_args += ' %s' % app_args.strip() # Append %TESTCASE% at end if no testcase pattern is found in app arguments. if not utils.sub_string_exists_in( ['%TESTCASE%', '%TESTCASE_FILE_URL%', '%TESTCASE_HTTP_URL%'], all_app_args) and app_args_append_testcase: all_app_args += ' %TESTCASE%' all_app_args = all_app_args.strip() # Build the actual command to run now. if debugger: command += '%s ' % debugger if app_path: command += app_path if all_app_args: command += ' %s' % all_app_args command = command.replace('%APP_DIR%', app_directory) command = command.replace('%CRASH_STACKTRACES_DIR%', crash_stacks_directory) command = command.replace('%DEVICE_TESTCASES_DIR%', device_testcases_directory) command = command.replace('%FUZZER_DIR%', fuzzer_directory) command = command.replace('%INPUT_DIR%', input_directory) command = command.replace('%ROOT_DIR%', root_directory) command = command.replace('%TESTCASE%', testcase_path) command = command.replace('%TESTCASE_FILE_URL%', testcase_file_url) command = command.replace('%TESTCASE_HTTP_URL%', testcase_http_url) command = command.replace('%TMP_DIR%', temp_directory) command = command.replace('%USER_PROFILE_DIR%', user_profile_directory) # Though we attempt to pass all flags that have been used to run html as # a test in our content shell job types for backwards compatibility, a # deprecation warning in recent revisions now causes it to fail. Remove # the --run-layout-test flag to avoid this. content_shell_app_names = [ 'content_shell', 'content_shell.exe', 'Content Shell' ] if (environment.get_value('APP_NAME') in content_shell_app_names and environment.get_value('APP_REVISION', 0) >= 558998): command = command.replace(' --run-layout-test', '') if plt == 'ANDROID' and not launcher: # Initial setup phase for command line. if write_command_line_file: android.adb.write_command_line_file(command, app_path) return android.app.get_launch_command(all_app_args, testcase_path, testcase_file_url) # Decide which directory we will run the application from. # We are using |app_directory| since it helps to locate pdbs # in same directory, other dependencies, etc. if os.path.exists(app_directory): os.chdir(app_directory) return str(command)
def clear_fuzzers_directories(): """Clears the fuzzers directory.""" remove_directory(environment.get_value('FUZZERS_DIR'), recreate=True)
def run_process(cmdline, current_working_directory=None, timeout=DEFAULT_TEST_TIMEOUT, need_shell=False, gestures=None, env_copy=None, testcase_run=True, ignore_children=True): """Executes a process with a given command line and other parameters.""" # FIXME(mbarbella): Using LAUNCHER_PATH here is error prone. It forces us to # do certain operations before fuzzer setup (e.g. bad build check). launcher = environment.get_value('LAUNCHER_PATH') if environment.is_trusted_host() and testcase_run and not launcher: from bot.untrusted_runner import remote_process_host return remote_process_host.run_process( cmdline, current_working_directory, timeout, need_shell, gestures, env_copy, testcase_run, ignore_children) if gestures is None: gestures = [] if env_copy: os.environ.update(env_copy) # This is used when running scripts on native linux OS and not on the device. # E.g. running a fuzzer to generate testcases or launcher script. plt = environment.platform() if plt in ['ANDROID', 'FUCHSIA'] and (not testcase_run or launcher): plt = 'LINUX' elif plt == 'IOS' and (not testcase_run or launcher): plt = 'MAC' # Lower down testcase timeout slightly to account for time for crash analysis. timeout -= CRASH_ANALYSIS_TIME # LeakSanitizer hack - give time for stdout/stderr processing. lsan = environment.get_value('LSAN', False) if lsan: timeout -= LSAN_ANALYSIS_TIME # Initialize variables. adb_output = None process_output = '' process_status = None return_code = 0 process_poll_interval = environment.get_value('PROCESS_POLL_INTERVAL', 0.5) start_time = time.time() watch_for_process_exit = ( environment.get_value('WATCH_FOR_PROCESS_EXIT') if plt == 'ANDROID' else True) window_list = [] # Get gesture start time from last element in gesture list. gestures = copy.deepcopy(gestures) if gestures and gestures[-1].startswith('Trigger'): gesture_start_time = int(gestures[-1].split(':')[1]) gestures.pop() else: gesture_start_time = timeout // 2 logs.log('Process (%s) started.' % str(cmdline), level=logging.DEBUG) if plt == 'ANDROID': # Clear the log upfront. android.logger.clear_log() # Run the app. adb_output = android.adb.run_command(cmdline, timeout=timeout) else: cmd, args = shell.get_command_and_arguments(cmdline) process_output = mozprocess.processhandler.StoreOutput() process_status = ProcessStatus() try: process_handle = mozprocess.ProcessHandlerMixin( cmd, args, cwd=current_working_directory, shell=need_shell, processOutputLine=[process_output], onFinish=[process_status], ignore_children=ignore_children) start_process(process_handle) except: logs.log_error('Exception occurred when running command: %s.' % cmdline) return None, None, '' while True: time.sleep(process_poll_interval) # Run the gestures at gesture_start_time or in case we didn't find windows # in the last try. if (gestures and time.time() - start_time >= gesture_start_time and not window_list): # In case, we don't find any windows, we increment the gesture start time # so that the next check is after 1 second. gesture_start_time += 1 if plt == 'LINUX': linux.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'WINDOWS': windows.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'ANDROID': android.gestures.run_gestures(gestures, start_time, timeout) # TODO(mbarbella): We add a fake window here to prevent gestures on # Android from getting executed more than once. window_list = ['FAKE'] if time.time() - start_time >= timeout: break # Collect the process output. output = ( android.logger.log_output() if plt == 'ANDROID' else '\n'.join(process_output.output)) if crash_analyzer.is_memory_tool_crash(output): break # Check if we need to bail out on process exit. if watch_for_process_exit: # If |watch_for_process_exit| is set, then we already completed running # our app launch command. So, we can bail out. if plt == 'ANDROID': break # On desktop, we bail out as soon as the process finishes. if process_status and process_status.finished: # Wait for process shutdown and set return code. process_handle.wait(timeout=PROCESS_CLEANUP_WAIT_TIME) break # Process output based on platform. if plt == 'ANDROID': # Get current log output. If device is in reboot mode, logcat automatically # waits for device to be online. time.sleep(ANDROID_CRASH_LOGCAT_WAIT_TIME) output = android.logger.log_output() if android.constants.LOW_MEMORY_REGEX.search(output): # If the device is low on memory, we should force reboot and bail out to # prevent device from getting in a frozen state. logs.log('Device is low on memory, rebooting.', output=output) android.adb.hard_reset() android.adb.wait_for_device() elif android.adb.time_since_last_reboot() < time.time() - start_time: # Check if a reboot has happened, if yes, append log output before reboot # and kernel logs content to output. log_before_last_reboot = android.logger.log_output_before_last_reboot() kernel_log = android.adb.get_kernel_log_content() output = '%s%s%s%s%s' % ( log_before_last_reboot, utils.get_line_seperator('Device rebooted'), output, utils.get_line_seperator('Kernel Log'), kernel_log) # Make sure to reset SE Linux Permissive Mode. This can be done cheaply # in ~0.15 sec and is needed especially between runs for kernel crashes. android.adb.run_as_root() android.settings.change_se_linux_to_permissive_mode() return_code = 1 # Add output from adb to the front. if adb_output: output = '%s\n\n%s' % (adb_output, output) # Kill the application if it is still running. We do this at the end to # prevent this from adding noise to the logcat output. task_name = environment.get_value('TASK_NAME') child_process_termination_pattern = environment.get_value( 'CHILD_PROCESS_TERMINATION_PATTERN') if task_name == 'fuzz' and child_process_termination_pattern: # In some cases, we do not want to terminate the application after each # run to avoid long startup times (e.g. for chrome). Terminate processes # matching a particular pattern for light cleanup in this case. android.adb.kill_processes_and_children_matching_name( child_process_termination_pattern) else: # There is no special termination behavior. Simply stop the application. android.app.stop() else: # Get the return code in case the process has finished already. # If the process hasn't finished, return_code will be None which is what # callers expect unless the output indicates a crash. return_code = process_handle.poll() # If the process is still running, then terminate it. if not process_status.finished: if launcher and cmdline.startswith(launcher): # If this was a launcher script, we KILL all child processes created # except for APP_NAME. # It is expected that, if the launcher script terminated normally, it # cleans up all the child processes it created itself. terminate_root_and_child_processes(process_handle.pid) else: try: # kill() here actually sends SIGTERM on posix. process_handle.kill() except: pass if lsan: time.sleep(LSAN_ANALYSIS_TIME) output = '\n'.join(process_output.output) # X Server hack when max client reached. if ('Maximum number of clients reached' in output or 'Unable to get connection to X server' in output): logs.log_error('Unable to connect to X server, exiting.') os.system('sudo killall -9 Xvfb blackbox >/dev/null 2>&1') sys.exit(0) if testcase_run and (crash_analyzer.is_memory_tool_crash(output) or crash_analyzer.is_check_failure_crash(output)): return_code = 1 # If a crash is found, then we add the memory state as well. if return_code and plt == 'ANDROID': ps_output = android.adb.get_ps_output() if ps_output: output += utils.get_line_seperator('Memory Statistics') output += ps_output logs.log( 'Process (%s) ended, exit code (%s), output (%s).' % (str(cmdline), str(return_code), str(output)), level=logging.DEBUG) return return_code, round(time.time() - start_time, 1), output
def main(): """Main build routine.""" bucket_prefix = environment.get_value('BUCKET_PREFIX') build_dir = environment.get_value('BUILD_DIR') wait_time = environment.get_value('WAIT_TIME') builds_metadata = build_info.get_production_builds_info( environment.platform()) if not builds_metadata: return global LAST_BUILD for build_metadata in builds_metadata: build_type = build_metadata.build_type revision = build_metadata.revision version = build_metadata.version if build_type not in ['stable', 'beta']: # We don't need dev or canary builds atm. continue # Starting building the builds. for tool in TOOLS_GN_MAPPINGS: tool_and_build_type = '%s-%s' % (tool, build_type) logs.log('Building %s.' % tool_and_build_type) # Check if we already have built the same build. if (tool_and_build_type in LAST_BUILD and revision == LAST_BUILD[tool_and_build_type]): logs.log('Skipping same build %s (revision %s).' % (tool_and_build_type, revision)) continue LAST_BUILD[tool_and_build_type] = revision file_name_prefix = '%s-linux-%s-%s' % (tool, build_type, version) archive_filename = '%s.zip' % file_name_prefix archive_path_local = '%s/%s' % (build_dir, archive_filename) bucket_name = '%s%s' % (bucket_prefix, tool.split('-')[0]) archive_path_remote = ( 'gs://%s/%s/%s' % (bucket_name, TOOLS_BUCKET_DIR_MAPPINGS[tool], archive_filename)) # Run the build script with required gn arguments. command = '' gn_args = '%s %s' % (TOOLS_GN_MAPPINGS[tool], GN_COMMON_ARGS) command += '%s "%s" %s %s' % (BUILD_HELPER_SCRIPT, gn_args, version, file_name_prefix) logs.log('Executing build script: %s.' % command) os.system(command) # Check if the build succeeded based on the existence of the # local archive file. if os.path.exists(archive_path_local): # Build success. Now, copy it to google cloud storage and make it # public. os.system('gsutil cp %s %s' % (archive_path_local, archive_path_remote)) os.system('gsutil acl set public-read %s' % archive_path_remote) logs.log('Build succeeded, created %s.' % archive_filename) else: LAST_BUILD[tool_and_build_type] = '' logs.log_error('Build failed, unable to create %s.' % archive_filename) logs.log('Completed cycle, waiting for %d secs.' % wait_time) time.sleep(wait_time)