def _upload_kernel_coverage_data(kcov_path, kernel_bid): """Upload kcov data to a cloud storage bucket.""" bucket_name = local_config.ProjectConfig().get('coverage.reports.bucket') if not bucket_name: return formatted_date = str(utils.utcnow().date().isoformat()) identifier = environment.get_value('BOT_NAME') + str( utils.utcnow().isoformat()) gcs_url = (f'gs://{bucket_name}/syzkaller/{formatted_date}/{kernel_bid}/' f'{identifier}') if storage.copy_file_to(kcov_path, gcs_url): logs.log(f'Copied kcov data to {gcs_url}.')
def _make_corpus_backup_public(target, corpus_fuzzer_name_override, corpus_backup_bucket_name): """Identifies old corpus backups and makes them public.""" corpus_backup_date = utils.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) corpus_backup_url = corpus_manager.gcs_url_for_backup_file( corpus_backup_bucket_name, corpus_fuzzer_name_override or target.engine, target.project_qualified_name(), corpus_backup_date) if not storage.get(corpus_backup_url): logs.log_warn('Failed to find corpus backup %s.' % corpus_backup_url) return if not _set_public_acl_if_needed(corpus_backup_url): return filename = ( corpus_manager.PUBLIC_BACKUP_TIMESTAMP + os.extsep + corpus_manager.BACKUP_ARCHIVE_FORMAT) public_url = os.path.join(os.path.dirname(corpus_backup_url), filename) if not storage.copy_blob(corpus_backup_url, public_url): logs.log_error( 'Failed to overwrite %s with the latest public corpus backup.' % public_url) return if not _set_public_acl_if_needed(public_url): return logs.log('Corpus backup %s is now marked public.' % corpus_backup_url)
def generate_csrf_token(length=64, valid_seconds=3600, html=False): """Generate a CSRF token.""" now = utils.utcnow() valid_token = None # Clean up expired tokens to prevent junk from building up in the datastore. tokens = data_types.CSRFToken.query( data_types.CSRFToken.user_email == helpers.get_user_email()) tokens_to_delete = [] for token in tokens: if token.expiration_time > now: valid_token = token continue tokens_to_delete.append(token.key) ndb_utils.delete_multi(tokens_to_delete) # Generate a new token. if not valid_token: valid_token = data_types.CSRFToken() valid_token.value = base64.b64encode(os.urandom(length)) valid_token.expiration_time = ( now + datetime.timedelta(seconds=valid_seconds)) valid_token.user_email = helpers.get_user_email() valid_token.put() value = valid_token.value if html: return '<input type="hidden" name="csrf_token" value="%s" />' % value return value
def add_task(command, argument, job_type, queue=None, wait_time=None): """Add a new task to the job queue.""" # Old testcases may pass in queue=None explicitly, # so we must check this here. if not queue: queue = default_queue() if wait_time is None: wait_time = random.randint(1, TASK_CREATION_WAIT_INTERVAL) if job_type != 'none': job = data_types.Job.query(data_types.Job.name == job_type).get() if not job: raise Error(f'Job {job_type} not found.') if job.is_external(): external_tasks.add_external_task(command, argument, job) return # Add the task. eta = utils.utcnow() + datetime.timedelta(seconds=wait_time) task = Task(command, argument, job_type, eta=eta) pubsub_client = pubsub.PubSubClient() pubsub_client.publish(pubsub.topic_name(utils.get_application_id(), queue), [task.to_pubsub_message()])
def _past_day_formatter(query_format, dataset): """Simple formatter to get stats for the past day.""" end_time = utils.utcnow().date() start_time = end_time - datetime.timedelta(days=1) return query_format.format(dataset=dataset, start_time=start_time, end_time=end_time)
def _get_date(date_value, days_ago): """Returns |date_value| if it is not empty otherwise returns the date |days_ago| number of days ago.""" if date_value: return date_value date_datetime = utils.utcnow() - datetime.timedelta(days=days_ago) return date_datetime.strftime('%Y-%m-%d')
def _coverage_formatter(query_format, dataset): """Prepare a query to check for changes in coverage week over week.""" end_date = utils.utcnow().date() - datetime.timedelta(days=1) middle_date = end_date - datetime.timedelta(days=7) start_date = end_date - datetime.timedelta(days=14) return query_format.format(dataset=dataset, start_date=start_date, middle_date=middle_date, end_date=end_date)
def time_has_expired(timestamp, compare_to=None, days=0, hours=0, minutes=0, seconds=0): """Checks to see if a timestamp is older than another by a certain amount.""" if compare_to is None: compare_to = utils.utcnow() total_time = days * 3600 * 24 + hours * 3600 + minutes * 60 + seconds return (compare_to - timestamp).total_seconds() > total_time
def defer(self): """Defer a task until its ETA. Returns whether or not we deferred.""" now = utils.utcnow() if now >= self.eta: return False # Extend the deadline until the ETA, or MAX_ACK_DEADLINE. time_until_eta = int((self.eta - now).total_seconds()) logs.log('Deferring task "%s".' % self.payload()) self._pubsub_message.modify_ack_deadline( min(pubsub.MAX_ACK_DEADLINE, time_until_eta)) return True
def backup_corpus(backup_bucket_name, corpus, directory): """Archive and store corpus as a backup. Args: backup_bucket_name: Backup bucket. corpus: The FuzzTargetCorpus. directory: Path to directory to be archived and backuped. Returns: The backup GCS url, or None on failure. """ if not backup_bucket_name: logs.log('No backup bucket provided, skipping corpus backup.') return None dated_backup_url = None timestamp = str(utils.utcnow().date()) # The archive path for shutil.make_archive should be without an extension. backup_archive_path = os.path.join( os.path.dirname(os.path.normpath(directory)), timestamp) try: backup_archive_path = shutil.make_archive(backup_archive_path, BACKUP_ARCHIVE_FORMAT, directory) dated_backup_url = gcs_url_for_backup_file( backup_bucket_name, corpus.engine, corpus.project_qualified_target_name, timestamp) if not storage.copy_file_to(backup_archive_path, dated_backup_url): return None latest_backup_url = gcs_url_for_backup_file( backup_bucket_name, corpus.engine, corpus.project_qualified_target_name, LATEST_BACKUP_TIMESTAMP) if not storage.copy_blob(dated_backup_url, latest_backup_url): logs.log_error( 'Failed to update latest corpus backup at "%s"' % latest_backup_url) except Exception as ex: logs.log_error( 'backup_corpus failed: %s\n' % str(ex), backup_bucket_name=backup_bucket_name, directory=directory, backup_archive_path=backup_archive_path) finally: # Remove backup archive. shell.remove_file(backup_archive_path) return dated_backup_url
def get_next_end_hour(): """Get the next end hour. If it's too early to compute data for the next end hour, return None.""" last_successful_hour = get_last_successful_hour_or_start_hour() if not last_successful_hour: # No crashes seen, too early to start building stats. raise TooEarlyException() next_end_hour = last_successful_hour + 1 next_datetime = crash_stats.get_datetime(next_end_hour) if (utils.utcnow() - next_datetime) <= BIGQUERY_INSERTION_DELAY: raise TooEarlyException() return next_end_hour
def _check_last_get_build_time(self, projects, build_type): """Check that builds are up to date.""" for project in projects: project_name = project['name'] builds = project['history'] if not builds: continue build = builds[0] time_since_last_build = utils.utcnow() - get_build_time(build) if time_since_last_build >= NO_BUILDS_THRESHOLD: # Something likely went wrong with the build infrastructure, log errors. logs.log_error( '%s has not been built in %s config for %d days.' % (project_name, build_type, time_since_last_build.days))
def _cross_pollinate_other_fuzzer_corpuses(self): """Add other fuzzer corpuses to shared corpus path for cross-pollination.""" corpus_backup_date = utils.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers: project_qualified_name = ( cross_pollinate_fuzzer.fuzz_target.project_qualified_name()) backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name corpus_backup_url = corpus_manager.gcs_url_for_backup_file( backup_bucket_name, corpus_engine_name, project_qualified_name, corpus_backup_date) corpus_backup_local_filename = '%s-%s' % ( project_qualified_name, os.path.basename(corpus_backup_url)) corpus_backup_local_path = os.path.join( self.shared_corpus_path, corpus_backup_local_filename) if not storage.exists(corpus_backup_url, ignore_errors=True): # This can happen in cases when a new fuzz target is checked in or if # missed to capture a backup for a particular day (for OSS-Fuzz, this # will result in a 403 instead of 404 since that GCS path belongs to # other project). So, just log a warning for debugging purposes only. logs.log_warn('Corpus backup does not exist, ignoring: %s.' % corpus_backup_url) continue if not storage.copy_file_from(corpus_backup_url, corpus_backup_local_path): continue corpus_backup_output_directory = os.path.join( self.shared_corpus_path, project_qualified_name) shell.create_directory(corpus_backup_output_directory) result = archive.unpack(corpus_backup_local_path, corpus_backup_output_directory) shell.remove_file(corpus_backup_local_path) if result: logs.log( 'Corpus backup url %s successfully unpacked into shared corpus.' % corpus_backup_url) else: logs.log_error('Failed to unpack corpus backup from url %s.' % corpus_backup_url)
def build_results(fuzzer, jobs, group_by, date_start, date_end): """Wrapper around the caching wrappers for _build_results. Decides which of those wrappers to call based on how long query should be cached for.""" datetime_end = _parse_date(date_end) if not datetime_end: raise helpers.EarlyExitException('Missing end date.', 400) if datetime_end < utils.utcnow().date(): logs.log('Building results for older stats %s %s %s %s %s.' % (fuzzer, jobs, group_by, date_start, date_end)) return _build_old_results(fuzzer, jobs, group_by, date_start, date_end) logs.log('Building results for stats including today %s %s %s %s %s.' % (fuzzer, jobs, group_by, date_start, date_end)) return _build_todays_results(fuzzer, jobs, group_by, date_start, date_end)
def execute_task(testcase_id, job_type): """Run analyze task.""" # Reset redzones. environment.reset_current_memory_tool_options(redzone_size=128) # Unset window location size and position properties so as to use default. environment.set_value('WINDOW_ARG', '') # Locate the testcase associated with the id. testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) metadata = data_types.TestcaseUploadMetadata.query( data_types.TestcaseUploadMetadata.testcase_id == int( testcase_id)).get() if not metadata: logs.log_error('Testcase %s has no associated upload metadata.' % testcase_id) testcase.key.delete() return is_lsan_enabled = environment.get_value('LSAN') if is_lsan_enabled: # Creates empty local blacklist so all leaks will be visible to uploader. leak_blacklist.create_empty_local_blacklist() # Store the bot name and timestamp in upload metadata. bot_name = environment.get_value('BOT_NAME') metadata.bot_name = bot_name metadata.timestamp = datetime.datetime.utcnow() metadata.put() # Adjust the test timeout, if user has provided one. if metadata.timeout: environment.set_value('TEST_TIMEOUT', metadata.timeout) # Adjust the number of retries, if user has provided one. if metadata.retries is not None: environment.set_value('CRASH_RETRIES', metadata.retries) # Set up testcase and get absolute testcase path. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: return # Set up build. setup_build(testcase) # Check if we have an application path. If not, our build failed # to setup correctly. if not build_manager.check_app_path(): data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Build setup failed') if data_handler.is_first_retry_for_task(testcase): build_fail_wait = environment.get_value('FAIL_WAIT') tasks.add_task('analyze', testcase_id, job_type, wait_time=build_fail_wait) else: data_handler.close_invalid_uploaded_testcase( testcase, metadata, 'Build setup failed') return # Update initial testcase information. testcase.absolute_path = testcase_file_path testcase.job_type = job_type testcase.binary_flag = utils.is_binary_file(testcase_file_path) testcase.queue = tasks.default_queue() testcase.crash_state = '' # Set initial testcase metadata fields (e.g. build url, etc). data_handler.set_initial_testcase_metadata(testcase) # Update minimized arguments and use ones provided during user upload. if not testcase.minimized_arguments: minimized_arguments = environment.get_value('APP_ARGS') or '' additional_command_line_flags = testcase.get_metadata( 'uploaded_additional_args') if additional_command_line_flags: minimized_arguments += ' %s' % additional_command_line_flags environment.set_value('APP_ARGS', minimized_arguments) testcase.minimized_arguments = minimized_arguments # Update other fields not set at upload time. testcase.crash_revision = environment.get_value('APP_REVISION') data_handler.set_initial_testcase_metadata(testcase) testcase.put() # Initialize some variables. gestures = testcase.gestures http_flag = testcase.http_flag test_timeout = environment.get_value('TEST_TIMEOUT') # Get the crash output. result = testcase_manager.test_for_crash_with_retries(testcase, testcase_file_path, test_timeout, http_flag=http_flag, compare_crash=False) # If we don't get a crash, try enabling http to see if we can get a crash. # Skip engine fuzzer jobs (e.g. libFuzzer, AFL) for which http testcase paths # are not applicable. if (not result.is_crash() and not http_flag and not environment.is_engine_fuzzer_job()): result_with_http = testcase_manager.test_for_crash_with_retries( testcase, testcase_file_path, test_timeout, http_flag=True, compare_crash=False) if result_with_http.is_crash(): logs.log('Testcase needs http flag for crash.') http_flag = True result = result_with_http # Refresh our object. testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return # Set application command line with the correct http flag. application_command_line = ( testcase_manager.get_command_line_for_application( testcase_file_path, needs_http=http_flag)) # Get the crash data. crashed = result.is_crash() crash_time = result.get_crash_time() state = result.get_symbolized_data() unsymbolized_crash_stacktrace = result.get_stacktrace(symbolized=False) # Get crash info object with minidump info. Also, re-generate unsymbolized # stacktrace if needed. crash_info, _ = (crash_uploader.get_crash_info_and_stacktrace( application_command_line, state.crash_stacktrace, gestures)) if crash_info: testcase.minidump_keys = crash_info.store_minidump() if not crashed: # Could not reproduce the crash. log_message = ('Testcase didn\'t crash in %d seconds (with retries)' % test_timeout) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED, log_message) # In the general case, we will not attempt to symbolize if we do not detect # a crash. For user uploads, we should symbolize anyway to provide more # information about what might be happening. crash_stacktrace_output = utils.get_crash_stacktrace_output( application_command_line, state.crash_stacktrace, unsymbolized_crash_stacktrace) testcase.crash_stacktrace = data_handler.filter_stacktrace( crash_stacktrace_output) # For an unreproducible testcase, retry once on another bot to confirm # our results and in case this bot is in a bad state which we didn't catch # through our usual means. if data_handler.is_first_retry_for_task(testcase): testcase.status = 'Unreproducible, retrying' testcase.put() tasks.add_task('analyze', testcase_id, job_type) return data_handler.close_invalid_uploaded_testcase(testcase, metadata, 'Unreproducible') # A non-reproducing testcase might still impact production branches. # Add the impact task to get that information. task_creation.create_impact_task_if_needed(testcase) return # Update testcase crash parameters. testcase.http_flag = http_flag testcase.crash_type = state.crash_type testcase.crash_address = state.crash_address testcase.crash_state = state.crash_state crash_stacktrace_output = utils.get_crash_stacktrace_output( application_command_line, state.crash_stacktrace, unsymbolized_crash_stacktrace) testcase.crash_stacktrace = data_handler.filter_stacktrace( crash_stacktrace_output) # Try to guess if the bug is security or not. security_flag = crash_analyzer.is_security_issue(state.crash_stacktrace, state.crash_type, state.crash_address) testcase.security_flag = security_flag # If it is, guess the severity. if security_flag: testcase.security_severity = severity_analyzer.get_security_severity( state.crash_type, state.crash_stacktrace, job_type, bool(gestures)) log_message = ('Testcase crashed in %d seconds (r%d)' % (crash_time, testcase.crash_revision)) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED, log_message) # See if we have to ignore this crash. if crash_analyzer.ignore_stacktrace(state.crash_stacktrace): data_handler.close_invalid_uploaded_testcase(testcase, metadata, 'Irrelavant') return # Test for reproducibility. one_time_crasher_flag = not testcase_manager.test_for_reproducibility( testcase.fuzzer_name, testcase.actual_fuzzer_name(), testcase_file_path, state.crash_state, security_flag, test_timeout, http_flag, gestures) testcase.one_time_crasher_flag = one_time_crasher_flag # Check to see if this is a duplicate. data_handler.check_uploaded_testcase_duplicate(testcase, metadata) # Set testcase and metadata status if not set already. if testcase.status == 'Duplicate': # For testcase uploaded by bots (with quiet flag), don't create additional # tasks. if metadata.quiet_flag: data_handler.close_invalid_uploaded_testcase( testcase, metadata, 'Duplicate') return else: # New testcase. testcase.status = 'Processed' metadata.status = 'Confirmed' # Reset the timestamp as well, to respect # data_types.MIN_ELAPSED_TIME_SINCE_REPORT. Otherwise it may get filed by # triage task prematurely without the grouper having a chance to run on this # testcase. testcase.timestamp = utils.utcnow() # Add new leaks to global blacklist to avoid detecting duplicates. # Only add if testcase has a direct leak crash and if it's reproducible. if is_lsan_enabled: leak_blacklist.add_crash_to_global_blacklist_if_needed(testcase) # Update the testcase values. testcase.put() # Update the upload metadata. metadata.security_flag = security_flag metadata.put() _add_default_issue_metadata(testcase) # Create tasks to # 1. Minimize testcase (minimize). # 2. Find regression range (regression). # 3. Find testcase impact on production branches (impact). # 4. Check whether testcase is fixed (progression). # 5. Get second stacktrace from another job in case of # one-time crashes (stack). task_creation.create_tasks(testcase)
def _new_fuzzer_formatter(query_format, dataset): """Prepare a query to check for new fuzzers from the past week.""" now = utils.utcnow().date() cutoff_time = now - datetime.timedelta(days=7) return query_format.format(dataset=dataset, cutoff_time=cutoff_time)
def current_date(): """Date format.""" return utils.utcnow().date().isoformat()
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory, data_directory): """Upload test cases from the list to a cloud storage bucket.""" # Since builtin fuzzers have a coverage minimized corpus, no need to upload # test case samples for them. if fuzzer_name in fuzzing.ENGINES: return bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return files_list = [] has_testcases_in_testcase_directory = False has_testcases_in_data_directory = False for testcase_path in testcase_list: if testcase_path.startswith(testcase_directory): files_list.append( os.path.relpath(testcase_path, testcase_directory)) has_testcases_in_testcase_directory = True elif testcase_path.startswith(data_directory): files_list.append(os.path.relpath(testcase_path, data_directory)) has_testcases_in_data_directory = True if not files_list: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = storage.get_cloud_storage_file_path(bucket_name, blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Cap the number of files. testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases) files_list = files_list[:testcases_limit] # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(files_list).encode('utf-8'), list_gcs_url): return if has_testcases_in_testcase_directory: # Sync everything in |testcase_directory| since it is fuzzer-generated. runner.rsync(testcase_directory, gcs_base_url) if has_testcases_in_data_directory: # Sync all fuzzer generated testcase in data bundle directory. runner.rsync(data_directory, gcs_base_url, exclusion_pattern=('(?!.*{fuzz_prefix})'.format( fuzz_prefix=testcase_manager.FUZZ_PREFIX))) # Sync all possible resource dependencies as a best effort. It matches # |resources-| prefix that a fuzzer can use to indicate resources. Also, it # matches resources directory that Chromium web_tests use for dependencies. runner.rsync(data_directory, gcs_base_url, exclusion_pattern='(?!.*resource)') logs.log('Synced {count} test cases to {gcs_url}.'.format( count=len(files_list), gcs_url=gcs_base_url))