def setup_testcase(testcase, job_type, fuzzer_override=None): """Sets up the testcase and needed dependencies like fuzzer, data bundle, etc.""" fuzzer_name = fuzzer_override or testcase.fuzzer_name task_name = environment.get_value('TASK_NAME') testcase_fail_wait = environment.get_value('FAIL_WAIT') testcase_id = testcase.key.id() # Clear testcase directories. shell.clear_testcase_directories() # Adjust the test timeout value if this is coming from an user uploaded # testcase. if testcase.uploader_email: _set_timeout_value_from_user_upload(testcase_id) # Update the fuzzer if necessary in order to get the updated data bundle. if fuzzer_name: try: update_successful = update_fuzzer_and_data_bundles(fuzzer_name) except errors.InvalidFuzzerError: # Close testcase and don't recreate tasks if this fuzzer is invalid. testcase.open = False testcase.fixed = 'NA' testcase.set_metadata('fuzzer_was_deleted', True) logs.log_error('Closed testcase %d with invalid fuzzer %s.' % (testcase_id, fuzzer_name)) error_message = 'Fuzzer %s no longer exists' % fuzzer_name data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) return None, None, None if not update_successful: error_message = 'Unable to setup fuzzer %s' % fuzzer_name data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) tasks.add_task(task_name, testcase_id, job_type, wait_time=testcase_fail_wait) return None, None, None # Extract the testcase and any of its resources to the input directory. file_list, input_directory, testcase_file_path = unpack_testcase(testcase) if not file_list: error_message = 'Unable to setup testcase %s' % testcase_file_path data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) tasks.add_task(task_name, testcase_id, job_type, wait_time=testcase_fail_wait) return None, None, None # For Android/Fuchsia, we need to sync our local testcases directory with the # one on the device. if environment.is_android(): _copy_testcase_to_device_and_setup_environment(testcase, testcase_file_path) # Push testcases to worker. if environment.is_trusted_host(): from clusterfuzz._internal.bot.untrusted_runner import file_host file_host.push_testcases_to_worker() # Copy global blacklist into local blacklist. is_lsan_enabled = environment.get_value('LSAN') if is_lsan_enabled: # Get local blacklist without this testcase's entry. leak_blacklist.copy_global_to_local_blacklist( excluded_testcase=testcase) prepare_environment_for_testcase(testcase, job_type, task_name) return file_list, input_directory, testcase_file_path
def find_fixed_range(testcase_id, job_type): """Attempt to find the revision range where a testcase was fixed.""" deadline = tasks.get_task_completion_deadline() testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return if testcase.fixed: logs.log_error('Fixed range is already set as %s, skip.' % testcase.fixed) return # Setup testcase and its dependencies. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: return # Set a flag to indicate we are running progression task. This shows pending # status on testcase report page and avoid conflicting testcase updates by # triage cron. testcase.set_metadata('progression_pending', True) # Custom binaries are handled as special cases. if build_manager.is_custom_binary(): _check_fixed_for_custom_binary(testcase, job_type, testcase_file_path) return build_bucket_path = build_manager.get_primary_bucket_path() revision_list = build_manager.get_revisions_list(build_bucket_path, testcase=testcase) if not revision_list: data_handler.close_testcase_with_error( testcase_id, 'Failed to fetch revision list') return # Use min, max_index to mark the start and end of revision list that is used # for bisecting the progression range. Set start to the revision where noticed # the crash. Set end to the trunk revision. Also, use min, max from past run # if it timed out. min_revision = testcase.get_metadata('last_progression_min') max_revision = testcase.get_metadata('last_progression_max') if min_revision or max_revision: # Clear these to avoid using them in next run. If this run fails, then we # should try next run without them to see it succeeds. If this run succeeds, # we should still clear them to avoid capping max revision in next run. testcase = data_handler.get_testcase_by_id(testcase_id) testcase.delete_metadata('last_progression_min', update_testcase=False) testcase.delete_metadata('last_progression_max', update_testcase=False) testcase.put() last_tested_revision = testcase.get_metadata('last_tested_crash_revision') known_crash_revision = last_tested_revision or testcase.crash_revision if not min_revision: min_revision = known_crash_revision if not max_revision: max_revision = revisions.get_last_revision_in_list(revision_list) min_index = revisions.find_min_revision_index(revision_list, min_revision) if min_index is None: raise errors.BuildNotFoundError(min_revision, job_type) max_index = revisions.find_max_revision_index(revision_list, max_revision) if max_index is None: raise errors.BuildNotFoundError(max_revision, job_type) testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED, 'r%d' % max_revision) # Check to see if this testcase is still crashing now. If it is, then just # bail out. result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, max_revision, update_metadata=True) if result.is_crash(): logs.log('Found crash with same signature on latest revision r%d.' % max_revision) app_path = environment.get_value('APP_PATH') command = testcase_manager.get_command_line_for_application( testcase_file_path, app_path=app_path, needs_http=testcase.http_flag) symbolized_crash_stacktrace = result.get_stacktrace(symbolized=True) unsymbolized_crash_stacktrace = result.get_stacktrace(symbolized=False) stacktrace = utils.get_crash_stacktrace_output( command, symbolized_crash_stacktrace, unsymbolized_crash_stacktrace) testcase = data_handler.get_testcase_by_id(testcase_id) testcase.last_tested_crash_stacktrace = data_handler.filter_stacktrace( stacktrace) data_handler.update_progression_completion_metadata( testcase, max_revision, is_crash=True, message='still crashes on latest revision r%s' % max_revision) # Since we've verified that the test case is still crashing, clear out any # metadata indicating potential flake from previous runs. task_creation.mark_unreproducible_if_flaky(testcase, False) # For chromium project, save latest crash information for later upload # to chromecrash/. state = result.get_symbolized_data() crash_uploader.save_crash_info_if_needed(testcase_id, max_revision, job_type, state.crash_type, state.crash_address, state.frames) return if result.unexpected_crash: testcase.set_metadata('crashes_on_unexpected_state', True) else: testcase.delete_metadata('crashes_on_unexpected_state') # Don't burden NFS server with caching these random builds. environment.set_value('CACHE_STORE', False) # Verify that we do crash in the min revision. This is assumed to be true # while we are doing the bisect. result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, min_revision) if result and not result.is_crash(): testcase = data_handler.get_testcase_by_id(testcase_id) # Retry once on another bot to confirm our result. if data_handler.is_first_retry_for_task(testcase, reset_after_retry=True): tasks.add_task('progression', testcase_id, job_type) error_message = ( 'Known crash revision %d did not crash, will retry on another bot to ' 'confirm result' % known_crash_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) data_handler.update_progression_completion_metadata( testcase, max_revision) return data_handler.clear_progression_pending(testcase) error_message = ('Known crash revision %d did not crash' % known_crash_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) task_creation.mark_unreproducible_if_flaky(testcase, True) return # Start a binary search to find last non-crashing revision. At this point, we # know that we do crash in the min_revision, and do not crash in max_revision. while time.time() < deadline: min_revision = revision_list[min_index] max_revision = revision_list[max_index] # If the min and max revisions are one apart this is as much as we can # narrow the range. if max_index - min_index == 1: _save_fixed_range(testcase_id, min_revision, max_revision, testcase_file_path) return # Occasionally, we get into this bad state. It seems to be related to test # cases with flaky stacks, but the exact cause is unknown. if max_index - min_index < 1: testcase = data_handler.get_testcase_by_id(testcase_id) testcase.fixed = 'NA' testcase.open = False message = ('Fixed testing errored out (min and max revisions ' 'are both %d)' % min_revision) data_handler.update_progression_completion_metadata( testcase, max_revision, message=message) # Let the bisection service know about the NA status. bisection.request_bisection(testcase) return # Test the middle revision of our range. middle_index = (min_index + max_index) // 2 middle_revision = revision_list[middle_index] testcase = data_handler.get_testcase_by_id(testcase_id) log_message = 'Testing r%d (current range %d:%d)' % ( middle_revision, min_revision, max_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.WIP, log_message) try: result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, middle_revision) except errors.BadBuildError: # Skip this revision. del revision_list[middle_index] max_index -= 1 continue if result.is_crash(): min_index = middle_index else: max_index = middle_index _save_current_fixed_range_indices(testcase_id, revision_list[min_index], revision_list[max_index]) # If we've broken out of the loop, we've exceeded the deadline. Recreate the # task to pick up where we left off. testcase = data_handler.get_testcase_by_id(testcase_id) error_message = ('Timed out, current range r%d:r%d' % (revision_list[min_index], revision_list[max_index])) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) tasks.add_task('progression', testcase_id, job_type)
def process_command(task): """Figures out what to do with the given task and executes the command.""" logs.log("Executing command '%s'" % task.payload()) if not task.payload().strip(): logs.log_error('Empty task received.') return # Parse task payload. task_name = task.command task_argument = task.argument job_name = task.job environment.set_value('TASK_NAME', task_name) environment.set_value('TASK_ARGUMENT', task_argument) environment.set_value('JOB_NAME', job_name) if job_name != 'none': job = data_types.Job.query(data_types.Job.name == job_name).get() # Job might be removed. In that case, we don't want an exception # raised and causing this task to be retried by another bot. if not job: logs.log_error("Job '%s' not found." % job_name) return if not job.platform: error_string = "No platform set for job '%s'" % job_name logs.log_error(error_string) raise errors.BadStateError(error_string) # A misconfiguration led to this point. Clean up the job if necessary. job_queue_suffix = tasks.queue_suffix_for_platform(job.platform) bot_queue_suffix = tasks.default_queue_suffix() if job_queue_suffix != bot_queue_suffix: # This happens rarely, store this as a hard exception. logs.log_error( 'Wrong platform for job %s: job queue [%s], bot queue [%s].' % (job_name, job_queue_suffix, bot_queue_suffix)) # Try to recreate the job in the correct task queue. new_queue = ( tasks.high_end_queue() if task.high_end else tasks.regular_queue()) new_queue += job_queue_suffix # Command override is continuously run by a bot. If we keep failing # and recreating the task, it will just DoS the entire task queue. # So, we don't create any new tasks in that case since it needs # manual intervention to fix the override anyway. if not task.is_command_override: try: tasks.add_task(task_name, task_argument, job_name, new_queue) except Exception: # This can happen on trying to publish on a non-existent topic, e.g. # a topic for a high-end bot on another platform. In this case, just # give up. logs.log_error('Failed to fix platform and re-add task.') # Add a wait interval to avoid overflowing task creation. failure_wait_interval = environment.get_value('FAIL_WAIT') time.sleep(failure_wait_interval) return if task_name != 'fuzz': # Make sure that our platform id matches that of the testcase (for # non-fuzz tasks). testcase = data_handler.get_entity_by_type_and_id(data_types.Testcase, task_argument) if testcase: current_platform_id = environment.get_platform_id() testcase_platform_id = testcase.platform_id # This indicates we are trying to run this job on the wrong platform. # This can happen when you have different type of devices (e.g # android) on the same platform group. In this case, we just recreate # the task. if (task_name != 'variant' and testcase_platform_id and not utils.fields_match(testcase_platform_id, current_platform_id)): logs.log( 'Testcase %d platform (%s) does not match with ours (%s), exiting' % (testcase.key.id(), testcase_platform_id, current_platform_id)) tasks.add_task( task_name, task_argument, job_name, wait_time=utils.random_number(1, TASK_RETRY_WAIT_LIMIT)) return # Some fuzzers contain additional environment variables that should be # set for them. Append these for tests generated by these fuzzers and for # the fuzz command itself. fuzzer_name = None if task_name == 'fuzz': fuzzer_name = task_argument elif testcase: fuzzer_name = testcase.fuzzer_name # Get job's environment string. environment_string = job.get_environment_string() if task_name == 'minimize': # Let jobs specify a different job and fuzzer to minimize with. job_environment = job.get_environment() minimize_job_override = job_environment.get('MINIMIZE_JOB_OVERRIDE') if minimize_job_override: minimize_job = data_types.Job.query( data_types.Job.name == minimize_job_override).get() if minimize_job: environment.set_value('JOB_NAME', minimize_job_override) environment_string = minimize_job.get_environment_string() environment_string += '\nORIGINAL_JOB_NAME = %s\n' % job_name job_name = minimize_job_override else: logs.log_error( 'Job for minimization not found: %s.' % minimize_job_override) # Fallback to using own job for minimization. minimize_fuzzer_override = job_environment.get('MINIMIZE_FUZZER_OVERRIDE') fuzzer_name = minimize_fuzzer_override or fuzzer_name if fuzzer_name and not environment.is_engine_fuzzer_job(job_name): fuzzer = data_types.Fuzzer.query( data_types.Fuzzer.name == fuzzer_name).get() additional_default_variables = '' additional_variables_for_job = '' if (fuzzer and hasattr(fuzzer, 'additional_environment_string') and fuzzer.additional_environment_string): for line in fuzzer.additional_environment_string.splitlines(): # Job specific values may be defined in fuzzer additional # environment variable name strings in the form # job_name:VAR_NAME = VALUE. if '=' in line and ':' in line.split('=', 1)[0]: fuzzer_job_name, environment_definition = line.split(':', 1) if fuzzer_job_name == job_name: additional_variables_for_job += '\n%s' % environment_definition continue additional_default_variables += '\n%s' % line environment_string += additional_default_variables environment_string += additional_variables_for_job # Update environment for the job. update_environment_for_job(environment_string) # Match the cpu architecture with the ones required in the job definition. # If they don't match, then bail out and recreate task. if not is_supported_cpu_arch_for_job(): logs.log( 'Unsupported cpu architecture specified in job definition, exiting.') tasks.add_task( task_name, task_argument, job_name, wait_time=utils.random_number(1, TASK_RETRY_WAIT_LIMIT)) return # Initial cleanup. cleanup_task_state() start_web_server_if_needed() try: run_command(task_name, task_argument, job_name) finally: # Final clean up. cleanup_task_state()
def find_regression_range(testcase_id, job_type): """Attempt to find when the testcase regressed.""" deadline = tasks.get_task_completion_deadline() testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return if testcase.regression: logs.log_error('Regression range is already set as %s, skip.' % testcase.regression) return # This task is not applicable for custom binaries. if build_manager.is_custom_binary(): testcase.regression = 'NA' data_handler.update_testcase_comment( testcase, data_types.TaskState.ERROR, 'Not applicable for custom binaries') return data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) # Setup testcase and its dependencies. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Failed to setup testcase') tasks.add_task('regression', testcase_id, job_type) return build_bucket_path = build_manager.get_primary_bucket_path() revision_list = build_manager.get_revisions_list(build_bucket_path, testcase=testcase) if not revision_list: data_handler.close_testcase_with_error( testcase_id, 'Failed to fetch revision list') return # Don't burden NFS server with caching these random builds. environment.set_value('CACHE_STORE', False) # Pick up where left off in a previous run if necessary. min_revision = testcase.get_metadata('last_regression_min') max_revision = testcase.get_metadata('last_regression_max') first_run = not min_revision and not max_revision if not min_revision: min_revision = revisions.get_first_revision_in_list(revision_list) if not max_revision: max_revision = testcase.crash_revision min_index = revisions.find_min_revision_index(revision_list, min_revision) if min_index is None: raise errors.BuildNotFoundError(min_revision, job_type) max_index = revisions.find_max_revision_index(revision_list, max_revision) if max_index is None: raise errors.BuildNotFoundError(max_revision, job_type) # Make sure that the revision where we noticed the crash, still crashes at # that revision. Otherwise, our binary search algorithm won't work correctly. max_revision = revision_list[max_index] crashes_in_max_revision = _testcase_reproduces_in_revision( testcase, testcase_file_path, job_type, max_revision, should_log=False) if not crashes_in_max_revision: testcase = data_handler.get_testcase_by_id(testcase_id) error_message = ('Known crash revision %d did not crash' % max_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) task_creation.mark_unreproducible_if_flaky(testcase, True) return # If we've made it this far, the test case appears to be reproducible. Clear # metadata from previous runs had it been marked as potentially flaky. task_creation.mark_unreproducible_if_flaky(testcase, False) # On the first run, check to see if we regressed near either the min or max # revision. if first_run and found_regression_near_extreme_revisions( testcase, testcase_file_path, job_type, revision_list, min_index, max_index): return while time.time() < deadline: min_revision = revision_list[min_index] max_revision = revision_list[max_index] # If the min and max revisions are one apart (or the same, if we only have # one build), this is as much as we can narrow the range. if max_index - min_index <= 1: # Verify that the regression range seems correct, and save it if so. if not validate_regression_range(testcase, testcase_file_path, job_type, revision_list, min_index): return save_regression_range(testcase_id, min_revision, max_revision) return middle_index = (min_index + max_index) // 2 middle_revision = revision_list[middle_index] try: is_crash = _testcase_reproduces_in_revision( testcase, testcase_file_path, job_type, middle_revision, min_revision=min_revision, max_revision=max_revision) except errors.BadBuildError: # Skip this revision. del revision_list[middle_index] max_index -= 1 continue if is_crash: max_index = middle_index else: min_index = middle_index _save_current_regression_range_indices(testcase_id, revision_list[min_index], revision_list[max_index]) # If we've broken out of the above loop, we timed out. We'll finish by # running another regression task and picking up from this point. testcase = data_handler.get_testcase_by_id(testcase_id) error_message = 'Timed out, current range r%d:r%d' % ( revision_list[min_index], revision_list[max_index]) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) tasks.add_task('regression', testcase_id, job_type)
def create_minimize_task_if_needed(testcase): """Creates a minimize task if needed.""" tasks.add_task('minimize', testcase.key.id(), testcase.job_type)
def execute_task(testcase_id, job_type): """Run analyze task.""" # Reset redzones. environment.reset_current_memory_tool_options(redzone_size=128) # Unset window location size and position properties so as to use default. environment.set_value('WINDOW_ARG', '') # Locate the testcase associated with the id. testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) metadata = data_types.TestcaseUploadMetadata.query( data_types.TestcaseUploadMetadata.testcase_id == int( testcase_id)).get() if not metadata: logs.log_error('Testcase %s has no associated upload metadata.' % testcase_id) testcase.key.delete() return is_lsan_enabled = environment.get_value('LSAN') if is_lsan_enabled: # Creates empty local blacklist so all leaks will be visible to uploader. leak_blacklist.create_empty_local_blacklist() # Store the bot name and timestamp in upload metadata. bot_name = environment.get_value('BOT_NAME') metadata.bot_name = bot_name metadata.timestamp = datetime.datetime.utcnow() metadata.put() # Adjust the test timeout, if user has provided one. if metadata.timeout: environment.set_value('TEST_TIMEOUT', metadata.timeout) # Adjust the number of retries, if user has provided one. if metadata.retries is not None: environment.set_value('CRASH_RETRIES', metadata.retries) # Set up testcase and get absolute testcase path. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: return # Set up build. setup_build(testcase) # Check if we have an application path. If not, our build failed # to setup correctly. if not build_manager.check_app_path(): data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Build setup failed') if data_handler.is_first_retry_for_task(testcase): build_fail_wait = environment.get_value('FAIL_WAIT') tasks.add_task('analyze', testcase_id, job_type, wait_time=build_fail_wait) else: data_handler.close_invalid_uploaded_testcase( testcase, metadata, 'Build setup failed') return # Update initial testcase information. testcase.absolute_path = testcase_file_path testcase.job_type = job_type testcase.binary_flag = utils.is_binary_file(testcase_file_path) testcase.queue = tasks.default_queue() testcase.crash_state = '' # Set initial testcase metadata fields (e.g. build url, etc). data_handler.set_initial_testcase_metadata(testcase) # Update minimized arguments and use ones provided during user upload. if not testcase.minimized_arguments: minimized_arguments = environment.get_value('APP_ARGS') or '' additional_command_line_flags = testcase.get_metadata( 'uploaded_additional_args') if additional_command_line_flags: minimized_arguments += ' %s' % additional_command_line_flags environment.set_value('APP_ARGS', minimized_arguments) testcase.minimized_arguments = minimized_arguments # Update other fields not set at upload time. testcase.crash_revision = environment.get_value('APP_REVISION') data_handler.set_initial_testcase_metadata(testcase) testcase.put() # Initialize some variables. gestures = testcase.gestures http_flag = testcase.http_flag test_timeout = environment.get_value('TEST_TIMEOUT') # Get the crash output. result = testcase_manager.test_for_crash_with_retries(testcase, testcase_file_path, test_timeout, http_flag=http_flag, compare_crash=False) # If we don't get a crash, try enabling http to see if we can get a crash. # Skip engine fuzzer jobs (e.g. libFuzzer, AFL) for which http testcase paths # are not applicable. if (not result.is_crash() and not http_flag and not environment.is_engine_fuzzer_job()): result_with_http = testcase_manager.test_for_crash_with_retries( testcase, testcase_file_path, test_timeout, http_flag=True, compare_crash=False) if result_with_http.is_crash(): logs.log('Testcase needs http flag for crash.') http_flag = True result = result_with_http # Refresh our object. testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return # Set application command line with the correct http flag. application_command_line = ( testcase_manager.get_command_line_for_application( testcase_file_path, needs_http=http_flag)) # Get the crash data. crashed = result.is_crash() crash_time = result.get_crash_time() state = result.get_symbolized_data() unsymbolized_crash_stacktrace = result.get_stacktrace(symbolized=False) # Get crash info object with minidump info. Also, re-generate unsymbolized # stacktrace if needed. crash_info, _ = (crash_uploader.get_crash_info_and_stacktrace( application_command_line, state.crash_stacktrace, gestures)) if crash_info: testcase.minidump_keys = crash_info.store_minidump() if not crashed: # Could not reproduce the crash. log_message = ('Testcase didn\'t crash in %d seconds (with retries)' % test_timeout) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED, log_message) # In the general case, we will not attempt to symbolize if we do not detect # a crash. For user uploads, we should symbolize anyway to provide more # information about what might be happening. crash_stacktrace_output = utils.get_crash_stacktrace_output( application_command_line, state.crash_stacktrace, unsymbolized_crash_stacktrace) testcase.crash_stacktrace = data_handler.filter_stacktrace( crash_stacktrace_output) # For an unreproducible testcase, retry once on another bot to confirm # our results and in case this bot is in a bad state which we didn't catch # through our usual means. if data_handler.is_first_retry_for_task(testcase): testcase.status = 'Unreproducible, retrying' testcase.put() tasks.add_task('analyze', testcase_id, job_type) return data_handler.close_invalid_uploaded_testcase(testcase, metadata, 'Unreproducible') # A non-reproducing testcase might still impact production branches. # Add the impact task to get that information. task_creation.create_impact_task_if_needed(testcase) return # Update testcase crash parameters. testcase.http_flag = http_flag testcase.crash_type = state.crash_type testcase.crash_address = state.crash_address testcase.crash_state = state.crash_state crash_stacktrace_output = utils.get_crash_stacktrace_output( application_command_line, state.crash_stacktrace, unsymbolized_crash_stacktrace) testcase.crash_stacktrace = data_handler.filter_stacktrace( crash_stacktrace_output) # Try to guess if the bug is security or not. security_flag = crash_analyzer.is_security_issue(state.crash_stacktrace, state.crash_type, state.crash_address) testcase.security_flag = security_flag # If it is, guess the severity. if security_flag: testcase.security_severity = severity_analyzer.get_security_severity( state.crash_type, state.crash_stacktrace, job_type, bool(gestures)) log_message = ('Testcase crashed in %d seconds (r%d)' % (crash_time, testcase.crash_revision)) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED, log_message) # See if we have to ignore this crash. if crash_analyzer.ignore_stacktrace(state.crash_stacktrace): data_handler.close_invalid_uploaded_testcase(testcase, metadata, 'Irrelavant') return # Test for reproducibility. one_time_crasher_flag = not testcase_manager.test_for_reproducibility( testcase.fuzzer_name, testcase.actual_fuzzer_name(), testcase_file_path, state.crash_state, security_flag, test_timeout, http_flag, gestures) testcase.one_time_crasher_flag = one_time_crasher_flag # Check to see if this is a duplicate. data_handler.check_uploaded_testcase_duplicate(testcase, metadata) # Set testcase and metadata status if not set already. if testcase.status == 'Duplicate': # For testcase uploaded by bots (with quiet flag), don't create additional # tasks. if metadata.quiet_flag: data_handler.close_invalid_uploaded_testcase( testcase, metadata, 'Duplicate') return else: # New testcase. testcase.status = 'Processed' metadata.status = 'Confirmed' # Reset the timestamp as well, to respect # data_types.MIN_ELAPSED_TIME_SINCE_REPORT. Otherwise it may get filed by # triage task prematurely without the grouper having a chance to run on this # testcase. testcase.timestamp = utils.utcnow() # Add new leaks to global blacklist to avoid detecting duplicates. # Only add if testcase has a direct leak crash and if it's reproducible. if is_lsan_enabled: leak_blacklist.add_crash_to_global_blacklist_if_needed(testcase) # Update the testcase values. testcase.put() # Update the upload metadata. metadata.security_flag = security_flag metadata.put() _add_default_issue_metadata(testcase) # Create tasks to # 1. Minimize testcase (minimize). # 2. Find regression range (regression). # 3. Find testcase impact on production branches (impact). # 4. Check whether testcase is fixed (progression). # 5. Get second stacktrace from another job in case of # one-time crashes (stack). task_creation.create_tasks(testcase)
def execute_task(metadata_id, job_type): """Unpack a bundled testcase archive and create analyze jobs for each item.""" metadata = ndb.Key(data_types.BundledArchiveMetadata, int(metadata_id)).get() if not metadata: logs.log_error('Invalid bundle metadata id %s.' % metadata_id) return bot_name = environment.get_value('BOT_NAME') upload_metadata = data_types.TestcaseUploadMetadata.query( data_types.TestcaseUploadMetadata.blobstore_key == metadata.blobstore_key).get() if not upload_metadata: logs.log_error('Invalid upload metadata key %s.' % metadata.blobstore_key) return job = data_types.Job.query(data_types.Job.name == metadata.job_type).get() if not job: logs.log_error('Invalid job_type %s.' % metadata.job_type) return # Update the upload metadata with this bot name. upload_metadata.bot_name = bot_name upload_metadata.put() # We can't use FUZZ_INPUTS directory since it is constrained # by tmpfs limits. testcases_directory = environment.get_value('FUZZ_INPUTS_DISK') # Retrieve multi-testcase archive. archive_path = os.path.join(testcases_directory, metadata.archive_filename) if not blobs.read_blob_to_disk(metadata.blobstore_key, archive_path): logs.log_error('Could not retrieve archive for bundle %d.' % metadata_id) tasks.add_task('unpack', metadata_id, job_type) return try: archive.unpack(archive_path, testcases_directory) except: logs.log_error('Could not unpack archive for bundle %d.' % metadata_id) tasks.add_task('unpack', metadata_id, job_type) return # Get additional testcase metadata (if any). additional_metadata = None if upload_metadata.additional_metadata_string: additional_metadata = json.loads( upload_metadata.additional_metadata_string) archive_state = data_types.ArchiveStatus.NONE bundled = True file_list = archive.get_file_list(archive_path) for file_path in file_list: absolute_file_path = os.path.join(testcases_directory, file_path) filename = os.path.basename(absolute_file_path) # Only files are actual testcases. Skip directories. if not os.path.isfile(absolute_file_path): continue try: file_handle = open(absolute_file_path, 'rb') blob_key = blobs.write_blob(file_handle) file_handle.close() except: blob_key = None if not blob_key: logs.log_error('Could not write testcase %s to blobstore.' % absolute_file_path) continue data_handler.create_user_uploaded_testcase( blob_key, metadata.blobstore_key, archive_state, metadata.archive_filename, filename, metadata.timeout, job, metadata.job_queue, metadata.http_flag, metadata.gestures, metadata.additional_arguments, metadata.bug_information, metadata.crash_revision, metadata.uploader_email, metadata.platform_id, metadata.app_launch_command, metadata.fuzzer_name, metadata.overridden_fuzzer_name, metadata.fuzzer_binary_name, bundled, upload_metadata.retries, upload_metadata.bug_summary_update_flag, upload_metadata.quiet_flag, additional_metadata) # The upload metadata for the archive is not needed anymore since we created # one for each testcase. upload_metadata.key.delete() shell.clear_testcase_directories()
def execute_task(testcase_id, job_type): """Attempt to find if the testcase affects release branches on Chromium.""" # This shouldn't ever get scheduled, but check just in case. if not utils.is_chromium(): return # Locate the testcase associated with the id. testcase = data_handler.get_testcase_by_id(testcase_id) # If this testcase is fixed, we should no longer be doing impact testing. if testcase.fixed and testcase.is_impact_set_flag: return # For testcases with status unreproducible, we just do impact analysis just # once. if testcase.is_status_unreproducible() and testcase.is_impact_set_flag: return # Update comments only after checking the above bailout conditions. data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) # This task is not applicable to unreproducible testcases. if testcase.one_time_crasher_flag: data_handler.update_testcase_comment( testcase, data_types.TaskState.ERROR, 'Not applicable for unreproducible testcases') return # This task is not applicable for custom binaries. We cannot remove the # creation of such tasks specifically for custom binary testcase in cron, # so exit gracefully. if build_manager.is_custom_binary(): data_handler.update_testcase_comment( testcase, data_types.TaskState.FINISHED, 'Not applicable for custom binaries') return # If we don't have a stable or beta build url pattern, we try to use build # information url to make a guess. if not build_manager.has_production_builds(): if not testcase.regression: data_handler.update_testcase_comment( testcase, data_types.TaskState.FINISHED, 'Cannot run without regression range, will re-run once regression ' 'task finishes') return impacts = get_impacts_from_url(testcase.regression, testcase.job_type) testcase = data_handler.get_testcase_by_id(testcase_id) set_testcase_with_impacts(testcase, impacts) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED) return # Setup testcase and its dependencies. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: return # Setup extended stable, stable, beta builds # and get impact and crash stacktrace. try: impacts = get_impacts_on_prod_builds(testcase, testcase_file_path) except BuildFailedException as error: testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, str(error)) tasks.add_task('impact', testcase_id, job_type, wait_time=environment.get_value('FAIL_WAIT')) return testcase = data_handler.get_testcase_by_id(testcase_id) set_testcase_with_impacts(testcase, impacts) # Set stacktrace in case we have a unreproducible crash on trunk, # but it crashes on one of the production builds. if testcase.is_status_unreproducible() and impacts.get_extra_trace(): testcase.crash_stacktrace = data_handler.filter_stacktrace( '%s\n\n%s' % (data_handler.get_stacktrace(testcase), impacts.get_extra_trace())) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED)
def do_post(self): """Upload a testcase.""" email = helpers.get_user_email() testcase_id = request.get('testcaseId') uploaded_file = self.get_upload() if testcase_id and not uploaded_file: testcase = helpers.get_testcase(testcase_id) if not access.can_user_access_testcase(testcase): raise helpers.AccessDeniedException() # Use minimized testcase for upload (if available). key = (testcase.minimized_keys if testcase.minimized_keys and testcase.minimized_keys != 'NA' else testcase.fuzzed_keys) uploaded_file = blobs.get_blob_info(key) # Extract filename part from blob. uploaded_file.filename = os.path.basename( uploaded_file.filename.replace('\\', os.sep)) job_type = request.get('job') if not job_type: raise helpers.EarlyExitException('Missing job name.', 400) job = data_types.Job.query(data_types.Job.name == job_type).get() if not job: raise helpers.EarlyExitException('Invalid job name.', 400) fuzzer_name = request.get('fuzzer') job_type_lowercase = job_type.lower() for engine in fuzzing.ENGINES: if engine.lower() in job_type_lowercase: fuzzer_name = engine is_engine_job = fuzzer_name and environment.is_engine_fuzzer_job( job_type) target_name = request.get('target') if not is_engine_job and target_name: raise helpers.EarlyExitException( 'Target name is not applicable to non-engine jobs (AFL, libFuzzer).', 400) if is_engine_job and not target_name: raise helpers.EarlyExitException( 'Missing target name for engine job (AFL, libFuzzer).', 400) if (target_name and not data_types.Fuzzer.VALID_NAME_REGEX.match(target_name)): raise helpers.EarlyExitException('Invalid target name.', 400) fully_qualified_fuzzer_name = '' if is_engine_job and target_name: if job.is_external(): # External jobs don't run and set FuzzTarget entities as part of # fuzz_task. Set it here instead. fuzz_target = (data_handler.record_fuzz_target( fuzzer_name, target_name, job_type)) fully_qualified_fuzzer_name = fuzz_target.fully_qualified_name( ) target_name = fuzz_target.binary else: fully_qualified_fuzzer_name, target_name = find_fuzz_target( fuzzer_name, target_name, job_type) if (not access.has_access(need_privileged_access=False, job_type=job_type, fuzzer_name=(fully_qualified_fuzzer_name or fuzzer_name)) and not _is_uploader_allowed(email)): raise helpers.AccessDeniedException() multiple_testcases = bool(request.get('multiple')) http_flag = bool(request.get('http')) high_end_job = bool(request.get('highEnd')) bug_information = request.get('issue') crash_revision = request.get('revision') timeout = request.get('timeout') retries = request.get('retries') bug_summary_update_flag = bool(request.get('updateIssue')) quiet_flag = bool(request.get('quiet')) additional_arguments = request.get('args') app_launch_command = request.get('cmd') platform_id = request.get('platform') issue_labels = request.get('issue_labels') gestures = request.get('gestures') or '[]' stacktrace = request.get('stacktrace') crash_data = None if job.is_external(): if not stacktrace: raise helpers.EarlyExitException( 'Stacktrace required for external jobs.', 400) if not crash_revision: raise helpers.EarlyExitException( 'Revision required for external jobs.', 400) crash_data = stack_analyzer.get_crash_data( stacktrace, fuzz_target=target_name, symbolize_flag=False, already_symbolized=True, detect_ooms_and_hangs=True) elif stacktrace: raise helpers.EarlyExitException( 'Should not specify stacktrace for non-external jobs.', 400) testcase_metadata = request.get('metadata', {}) if testcase_metadata: try: testcase_metadata = json.loads(testcase_metadata) except Exception as e: raise helpers.EarlyExitException('Invalid metadata JSON.', 400) from e if not isinstance(testcase_metadata, dict): raise helpers.EarlyExitException( 'Metadata is not a JSON object.', 400) if issue_labels: testcase_metadata['issue_labels'] = issue_labels try: gestures = ast.literal_eval(gestures) except Exception as e: raise helpers.EarlyExitException('Failed to parse gestures.', 400) from e archive_state = 0 bundled = False file_path_input = '' # Certain modifications such as app launch command, issue updates are only # allowed for privileged users. privileged_user = access.has_access(need_privileged_access=True) if not privileged_user: if bug_information or bug_summary_update_flag: raise helpers.EarlyExitException( 'You are not privileged to update existing issues.', 400) need_privileged_access = utils.string_is_true( data_handler.get_value_from_job_definition( job_type, 'PRIVILEGED_ACCESS')) if need_privileged_access: raise helpers.EarlyExitException( 'You are not privileged to run this job type.', 400) if app_launch_command: raise helpers.EarlyExitException( 'You are not privileged to run arbitrary launch commands.', 400) if (testcase_metadata and not _allow_unprivileged_metadata(testcase_metadata)): raise helpers.EarlyExitException( 'You are not privileged to set testcase metadata.', 400) if additional_arguments: raise helpers.EarlyExitException( 'You are not privileged to add command-line arguments.', 400) if gestures: raise helpers.EarlyExitException( 'You are not privileged to run arbitrary gestures.', 400) if crash_revision and crash_revision.isdigit(): crash_revision = int(crash_revision) else: crash_revision = 0 if bug_information == '0': # Auto-recover from this bad input. bug_information = None if bug_information and not bug_information.isdigit(): raise helpers.EarlyExitException('Bug is not a number.', 400) if not timeout: timeout = 0 elif not timeout.isdigit() or timeout == '0': raise helpers.EarlyExitException( 'Testcase timeout must be a number greater than 0.', 400) else: timeout = int(timeout) if timeout > 120: raise helpers.EarlyExitException( 'Testcase timeout may not be greater than 120 seconds.', 400) if retries: if retries.isdigit(): retries = int(retries) else: retries = None if retries is None or retries > MAX_RETRIES: raise helpers.EarlyExitException( 'Testcase retries must be a number less than %d.' % MAX_RETRIES, 400) else: retries = None job_queue = tasks.queue_for_job(job_type, is_high_end=high_end_job) if uploaded_file is not None: filename = ''.join([ x for x in uploaded_file.filename if x not in ' ;/?:@&=+$,{}|<>()\\' ]) key = str(uploaded_file.key()) if archive.is_archive(filename): archive_state = data_types.ArchiveStatus.FUZZED if archive_state: if multiple_testcases: # Create a job to unpack an archive. metadata = data_types.BundledArchiveMetadata() metadata.blobstore_key = key metadata.timeout = timeout metadata.job_queue = job_queue metadata.job_type = job_type metadata.http_flag = http_flag metadata.archive_filename = filename metadata.uploader_email = email metadata.gestures = gestures metadata.crash_revision = crash_revision metadata.additional_arguments = additional_arguments metadata.bug_information = bug_information metadata.platform_id = platform_id metadata.app_launch_command = app_launch_command metadata.fuzzer_name = fuzzer_name metadata.overridden_fuzzer_name = fully_qualified_fuzzer_name metadata.fuzzer_binary_name = target_name metadata.put() tasks.add_task('unpack', str(metadata.key.id()), job_type, queue=tasks.queue_for_job(job_type)) # Create a testcase metadata object to show the user their upload. upload_metadata = data_types.TestcaseUploadMetadata() upload_metadata.timestamp = datetime.datetime.utcnow() upload_metadata.filename = filename upload_metadata.blobstore_key = key upload_metadata.original_blobstore_key = key upload_metadata.status = 'Pending' upload_metadata.bundled = True upload_metadata.uploader_email = email upload_metadata.retries = retries upload_metadata.bug_summary_update_flag = bug_summary_update_flag upload_metadata.quiet_flag = quiet_flag upload_metadata.additional_metadata_string = json.dumps( testcase_metadata) upload_metadata.bug_information = bug_information upload_metadata.put() helpers.log('Uploaded multiple testcases.', helpers.VIEW_OPERATION) return self.render_json({'multiple': True}) file_path_input = guess_input_file(uploaded_file, filename) if not file_path_input: raise helpers.EarlyExitException(( "Unable to detect which file to launch. The main file\'s name " 'must contain either of %s.' % str(RUN_FILE_PATTERNS)), 400) else: raise helpers.EarlyExitException('Please select a file to upload.', 400) testcase_id = data_handler.create_user_uploaded_testcase( key, key, archive_state, filename, file_path_input, timeout, job, job_queue, http_flag, gestures, additional_arguments, bug_information, crash_revision, email, platform_id, app_launch_command, fuzzer_name, fully_qualified_fuzzer_name, target_name, bundled, retries, bug_summary_update_flag, quiet_flag, additional_metadata=testcase_metadata, crash_data=crash_data) if not quiet_flag: testcase = data_handler.get_testcase_by_id(testcase_id) issue = issue_tracker_utils.get_issue_for_testcase(testcase) if issue: report_url = data_handler.TESTCASE_REPORT_URL.format( domain=data_handler.get_domain(), testcase_id=testcase_id) comment = ('ClusterFuzz is analyzing your testcase. ' 'Developers can follow the progress at %s.' % report_url) issue.save(new_comment=comment) helpers.log('Uploaded testcase %s' % testcase_id, helpers.VIEW_OPERATION) return self.render_json({'id': '%s' % testcase_id})
def execute_task(testcase_id, job_type): """Execute a symbolize command.""" # Locate the testcase associated with the id. testcase = data_handler.get_testcase_by_id(testcase_id) # We should atleast have a symbolized debug or release build. if not build_manager.has_symbolized_builds(): return data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) # Setup testcase and its dependencies. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: return # Initialize variables. build_fail_wait = environment.get_value('FAIL_WAIT') old_crash_stacktrace = data_handler.get_stacktrace(testcase) sym_crash_type = testcase.crash_type sym_crash_address = testcase.crash_address sym_crash_state = testcase.crash_state sym_redzone = DEFAULT_REDZONE warmup_timeout = environment.get_value('WARMUP_TIMEOUT') # Decide which build revision to use. if testcase.crash_stacktrace == 'Pending': # This usually happen when someone clicked the 'Update stacktrace from # trunk' button on the testcase details page. In this case, we are forced # to use trunk. No revision -> trunk build. build_revision = None else: build_revision = testcase.crash_revision # Set up a custom or regular build based on revision. build_manager.setup_build(build_revision) # Get crash revision used in setting up build. crash_revision = environment.get_value('APP_REVISION') if not build_manager.check_app_path(): testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Build setup failed') tasks.add_task('symbolize', testcase_id, job_type, wait_time=build_fail_wait) return # ASAN tool settings (if the tool is used). # See if we can get better stacks with higher redzone sizes. # A UAF might actually turn out to be OOB read/write with a bigger redzone. if environment.tool_matches('ASAN', job_type) and testcase.security_flag: redzone = MAX_REDZONE while redzone >= MIN_REDZONE: environment.reset_current_memory_tool_options( redzone_size=testcase.redzone, disable_ubsan=testcase.disable_ubsan) process_handler.terminate_stale_application_instances() command = testcase_manager.get_command_line_for_application( testcase_file_path, needs_http=testcase.http_flag) return_code, crash_time, output = (process_handler.run_process( command, timeout=warmup_timeout, gestures=testcase.gestures)) crash_result = CrashResult(return_code, crash_time, output) if crash_result.is_crash() and 'AddressSanitizer' in output: state = crash_result.get_symbolized_data() security_flag = crash_result.is_security_issue() if (not crash_analyzer.ignore_stacktrace( state.crash_stacktrace) and security_flag == testcase.security_flag and state.crash_type == testcase.crash_type and (state.crash_type != sym_crash_type or state.crash_state != sym_crash_state)): logs.log( 'Changing crash parameters.\nOld : %s, %s, %s' % (sym_crash_type, sym_crash_address, sym_crash_state)) sym_crash_type = state.crash_type sym_crash_address = state.crash_address sym_crash_state = state.crash_state sym_redzone = redzone old_crash_stacktrace = state.crash_stacktrace logs.log( '\nNew : %s, %s, %s' % (sym_crash_type, sym_crash_address, sym_crash_state)) break redzone /= 2 # We should have atleast a symbolized debug or a release build. symbolized_builds = build_manager.setup_symbolized_builds(crash_revision) if (not symbolized_builds or (not build_manager.check_app_path() and not build_manager.check_app_path('APP_PATH_DEBUG'))): testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Build setup failed') tasks.add_task('symbolize', testcase_id, job_type, wait_time=build_fail_wait) return # Increase malloc_context_size to get all stack frames. Default is 30. environment.reset_current_memory_tool_options( redzone_size=sym_redzone, malloc_context_size=STACK_FRAME_COUNT, symbolize_inline_frames=True, disable_ubsan=testcase.disable_ubsan) # TSAN tool settings (if the tool is used). if environment.tool_matches('TSAN', job_type): environment.set_tsan_max_history_size() # Do the symbolization if supported by this application. result, sym_crash_stacktrace = (get_symbolized_stacktraces( testcase_file_path, testcase, old_crash_stacktrace, sym_crash_state)) # Update crash parameters. testcase = data_handler.get_testcase_by_id(testcase_id) testcase.crash_type = sym_crash_type testcase.crash_address = sym_crash_address testcase.crash_state = sym_crash_state testcase.crash_stacktrace = ( data_handler.filter_stacktrace(sym_crash_stacktrace)) if not result: data_handler.update_testcase_comment( testcase, data_types.TaskState.ERROR, 'Unable to reproduce crash, skipping ' 'stacktrace update') else: # Switch build url to use the less-optimized symbolized build with better # stacktrace. build_url = environment.get_value('BUILD_URL') if build_url: testcase.set_metadata('build_url', build_url, update_testcase=False) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED) testcase.symbolized = True testcase.crash_revision = crash_revision testcase.put() # We might have updated the crash state. See if we need to marked as duplicate # based on other testcases. data_handler.handle_duplicate_entry(testcase) task_creation.create_blame_task_if_needed(testcase) # Switch current directory before builds cleanup. root_directory = environment.get_value('ROOT_DIR') os.chdir(root_directory) # Cleanup symbolized builds which are space-heavy. symbolized_builds.delete()
def get(self): assert self.task tasks.add_task(self.task, self.argument, self.job_type)