def get_build_urls_list(bucket_path, reverse=True): """Returns a sorted list of build urls from a bucket path.""" if not bucket_path: return [] base_url = os.path.dirname(bucket_path) if environment.is_running_on_app_engine(): build_urls = list(storage.list_blobs(base_url)) else: keys_directory = environment.get_value('BUILD_URLS_DIR') keys_filename = '%s.list' % utils.string_hash(bucket_path) keys_file_path = os.path.join(keys_directory, keys_filename) # For one task, keys file that is cached locally should be re-used. # Otherwise, we do waste lot of network bandwidth calling and getting the # same set of urls (esp for regression and progression testing). if not os.path.exists(keys_file_path): # Get url list by reading the GCS bucket. with open(keys_file_path, 'w') as f: for path in storage.list_blobs(base_url): f.write(path + '\n') content = utils.read_data_from_file(keys_file_path, eval_data=False) if not content: return [] build_urls = content.splitlines() return _sort_build_urls_by_revision(build_urls, bucket_path, reverse)
def _service_account_id(project): """Return service account ID for project.""" # From # cloud.google.com/iam/reference/rest/v1/projects.serviceAccounts/create: # # The account id that is used to generate the service account email address # and a stable unique id. It is unique within a project, must be 6-30 # characters long, and match the regular expression [a-z]([-a-z0-9]*[a-z0-9]) # to comply with RFC1035. account_id = _ACCOUNT_PREFIX + project.replace('_', '-') if not account_id[-1].isalnum(): # Must end in '[a-z][0-9]'. account_id += '0' if len(account_id) < _MIN_LEN: # Must be at least |min_len| in length. account_id = account_id.ljust(_MIN_LEN, '0') # Use a hash prefix as the service account name if the project name is too # long. if len(account_id) > _MAX_LEN: account_id = _ACCOUNT_PREFIX + utils.string_hash(project)[:_HASH_PREFIX_LEN] assert len(account_id) >= _MIN_LEN and len(account_id) <= _MAX_LEN return account_id
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory): """Upload test cases from the list to a cloud storage bucket.""" bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return # Only consider test cases in the output directory. We might upload too much # if we search the data directory as well, or have missing resources. # TODO(mbarbella): Support resources in data bundles. testcase_list = [ os.path.relpath(testcase, testcase_directory) for testcase in testcase_list if testcase.startswith(testcase_directory) ] if not testcase_list: return # Bail out if this batch of test cases is too large. directory_size = shell.get_directory_size(testcase_directory) if directory_size >= MAX_TESTCASE_DIRECTORY_SIZE: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name, blob=blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(testcase_list), list_gcs_url): return runner.rsync(testcase_directory, gcs_base_url) logs.log('Synced {count} test cases to {gcs_url}'.format( count=len(testcase_list), gcs_url=gcs_base_url))
def _get_build_directory(bucket_path, job_name): """Return the build directory based on bucket path and job name.""" builds_directory = environment.get_value('BUILDS_DIR') # In case we have a bucket path, we want those to share the same build # directory. if bucket_path: path = _remove_scheme(bucket_path).lstrip('/') bucket_path, file_pattern = path.rsplit('/', 1) bucket_path = bucket_path.replace('/', '_') # Remove similar build types to force them in same directory. file_pattern = utils.remove_sub_strings(file_pattern, BUILD_TYPE_SUBSTRINGS) file_pattern_hash = utils.string_hash(file_pattern) job_directory = '%s_%s' % (bucket_path, file_pattern_hash) else: job_directory = job_name return os.path.join(builds_directory, job_directory)
def store_file_in_cache(file_path, cached_files_per_directory_limit=True, force_update=False): """Get file from nfs cache if available.""" if not os.path.exists(file_path): logs.log_error( 'Local file %s does not exist, nothing to store in cache.' % file_path) return if os.path.getsize(file_path) > CACHE_SIZE_LIMIT: logs.log('File %s is too large to store in cache, skipping.' % file_path) return nfs_root = environment.get_value('NFS_ROOT') if not nfs_root: # No NFS, nothing to store in cache. return # If NFS server is not available due to heavy load, skip storage operation # altogether as we would fail to store file. if not os.path.exists(os.path.join(nfs_root, '.')): # Use . to iterate mount. logs.log_warn('Cache %s not available.' % nfs_root) return cache_file_path = get_cache_file_path(file_path) cache_directory = os.path.dirname(cache_file_path) filename = os.path.basename(file_path) if not os.path.exists(cache_directory): if not shell.create_directory(cache_directory, create_intermediates=True): logs.log_error('Failed to create cache directory %s.' % cache_directory) return # Check if the file already exists in cache. if file_exists_in_cache(cache_file_path): if not force_update: return # If we are forcing update, we need to remove current cached file and its # metadata. remove_cache_file_and_metadata(cache_file_path) # Delete old cached files beyond our maximum storage limit. if cached_files_per_directory_limit: # Get a list of cached files. cached_files_list = [] for cached_filename in os.listdir(cache_directory): if cached_filename.endswith(CACHE_METADATA_FILE_EXTENSION): continue cached_file_path = os.path.join(cache_directory, cached_filename) cached_files_list.append(cached_file_path) mtime = lambda f: os.stat(f).st_mtime last_used_cached_files_list = list( sorted(cached_files_list, key=mtime, reverse=True)) for cached_file_path in ( last_used_cached_files_list[MAX_CACHED_FILES_PER_DIRECTORY - 1:]): remove_cache_file_and_metadata(cached_file_path) # Start storing the actual file in cache now. logs.log('Started storing file %s into cache.' % filename) # Fetch lock to store this file. Try only once since if any other bot has # started to store it, we don't need to do it ourselves. Just bail out. lock_name = 'store:cache_file:%s' % utils.string_hash(cache_file_path) if not locks.acquire_lock( lock_name, max_hold_seconds=CACHE_LOCK_TIMEOUT, retries=1, by_zone=True): logs.log_warn( 'Unable to fetch lock to update cache file %s, skipping.' % filename) return # Check if another bot already updated it. if file_exists_in_cache(cache_file_path): locks.release_lock(lock_name, by_zone=True) return shell.copy_file(file_path, cache_file_path) write_cache_file_metadata(cache_file_path, file_path) time.sleep(CACHE_COPY_WAIT_TIME) error_occurred = not file_exists_in_cache(cache_file_path) locks.release_lock(lock_name, by_zone=True) if error_occurred: logs.log_error('Failed to store file %s into cache.' % filename) else: logs.log('Completed storing file %s into cache.' % filename)
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory, data_directory): """Upload test cases from the list to a cloud storage bucket.""" # Since builtin fuzzers have a coverage minimized corpus, no need to upload # test case samples for them. if fuzzer_name in builtin_fuzzers.BUILTIN_FUZZERS: return bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return files_list = [] has_testcases_in_testcase_directory = False has_testcases_in_data_directory = False for testcase_path in testcase_list: if testcase_path.startswith(testcase_directory): files_list.append( os.path.relpath(testcase_path, testcase_directory)) has_testcases_in_testcase_directory = True elif testcase_path.startswith(data_directory): files_list.append(os.path.relpath(testcase_path, data_directory)) has_testcases_in_data_directory = True if not files_list: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name, blob=blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Cap the number of files. testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases) files_list = files_list[:testcases_limit] # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(files_list), list_gcs_url): return if has_testcases_in_testcase_directory: # Sync everything in |testcase_directory| since it is fuzzer-generated. runner.rsync(testcase_directory, gcs_base_url) if has_testcases_in_data_directory: # Sync all fuzzer generated testcase in data bundle directory. runner.rsync(data_directory, gcs_base_url, exclusion_pattern=('(?!.*{fuzz_prefix})'.format( fuzz_prefix=testcase_manager.FUZZ_PREFIX))) # Sync all possible resource dependencies as a best effort. It matches # |resources-| prefix that a fuzzer can use to indicate resources. Also, it # matches resources directory that Chromium web_tests use for dependencies. runner.rsync(data_directory, gcs_base_url, exclusion_pattern='(?!.*resource)') logs.log('Synced {count} test cases to {gcs_url}.'.format( count=len(files_list), gcs_url=gcs_base_url))
def run_testcase_and_return_result_in_queue(crash_queue, thread_index, file_path, gestures, env_copy, upload_output=False): """Run a single testcase and return crash results in the crash queue.""" # Since this is running in its own process, initialize the log handler again. # This is needed for Windows where instances are not shared across child # processes. See: # https://stackoverflow.com/questions/34724643/python-logging-with-multiprocessing-root-logger-different-in-windows logs.configure('run_testcase', { 'testcase_path': file_path, }) try: # Run testcase and check whether a crash occurred or not. return_code, crash_time, output = run_testcase(thread_index, file_path, gestures, env_copy) # Pull testcase directory to host to get any stats files. if environment.is_trusted_host(): from bot.untrusted_runner import file_host file_host.pull_testcases_from_worker() # Analyze the crash. crash_output = _get_crash_output(output) crash_result = CrashResult(return_code, crash_time, crash_output) # To provide consistency between stats and logs, we use timestamp taken # from stats when uploading logs and testcase. if upload_output: log_time = _get_testcase_time(file_path) if crash_result.is_crash(): # Initialize resource list with the testcase path. resource_list = [file_path] resource_list += get_resource_paths(crash_output) # Store the crash stack file in the crash stacktrace directory # with filename as the hash of the testcase path. crash_stacks_directory = environment.get_value( 'CRASH_STACKTRACES_DIR') stack_file_path = os.path.join(crash_stacks_directory, utils.string_hash(file_path)) utils.write_data_to_file(crash_output, stack_file_path) # Put crash/no-crash results in the crash queue. crash_queue.put( Crash(file_path=file_path, crash_time=crash_time, return_code=return_code, resource_list=resource_list, gestures=gestures, stack_file_path=stack_file_path)) # Don't upload uninteresting testcases (no crash) or if there is no log to # correlate it with (not upload_output). if upload_output: upload_testcase(file_path, log_time) if upload_output: # Include full output for uploaded logs (crash output, merge output, etc). crash_result_full = CrashResult(return_code, crash_time, output) log = prepare_log_for_upload(crash_result_full.get_stacktrace(), return_code) upload_log(log, log_time) except Exception: logs.log_error('Exception occurred while running ' 'run_testcase_and_return_result_in_queue.')