def _channel_connectivity_changed(connectivity): """Callback for channel connectivity changes.""" try: with _host_state.channel_condition: if connectivity == grpc.ChannelConnectivity.READY: if _check_state(): logs.log('Connected to worker.') _host_state.channel_state = ChannelState.READY else: _host_state.channel_state = ChannelState.INCONSISTENT _host_state.channel_condition.notify_all() return _host_state.channel_state = ChannelState.NOT_READY if connectivity == grpc.ChannelConnectivity.SHUTDOWN: if _host_state.expect_shutdown: # We requested a shutdown to update the source. logs.log('Worker shutting down.') return raise untrusted.HostException('Unrecoverable error.') except AttributeError: # Python sets all globals to None on shutdown. Ignore. logs.log('Shutting down.') return if connectivity == grpc.ChannelConnectivity.TRANSIENT_FAILURE: logs.log_warn('Transient failure detected on worker channel.') if connectivity == grpc.ChannelConnectivity.CONNECTING: logs.log('Reconnecting to worker.')
def reset_usb(): """Reset USB bus for a device serial.""" if environment.is_android_cuttlefish() or environment.is_android_emulator(): # Nothing to do here. return True # App Engine does not let us import this. import fcntl # We need to get latest device path since it could be changed in reboots or # adb root restarts. try: device_path = get_device_path() except IOError: # We may reach this state if the device is no longer available. device_path = None if not device_path: # Try pulling from cache (if available). device_path = environment.get_value('DEVICE_PATH') if not device_path: logs.log_warn('No device path found, unable to reset usb.') return False try: with open(device_path, 'w') as f: fcntl.ioctl(f, USBDEVFS_RESET) except: logs.log_warn('Failed to reset usb.') return False # Wait for usb to recover. wait_for_device(recover=False) return True
def send(to_email, subject, html_content): """Send email.""" sendgrid_api_key = db_config.get_value('sendgrid_api_key') if not sendgrid_api_key: logs.log_warn( 'Skipping email as SendGrid API key is not set in config.') return from_email = db_config.get_value('sendgrid_sender') if not from_email: logs.log_warn( 'Skipping email as SendGrid sender is not set in config.') return message = Mail(from_email=From(str(from_email)), to_emails=To(str(to_email)), subject=Subject(subject), html_content=HtmlContent(str(html_content))) try: sg = SendGridAPIClient(sendgrid_api_key) response = sg.send(message) logs.log('Sent email to %s.' % to_email, status_code=response.status_code, body=response.body, headers=response.headers) except Exception: logs.log_error('Failed to send email to %s.' % to_email)
def get_value(key, default_value=None, constructor=None): """Get the value for a key.""" value_path = get_value_file_path(key) if not os.path.exists(value_path): return default_value try: with open(value_path, 'rb') as f: value_str = f.read() except IOError: logs.log_error('Failed to read %s from persistent cache.' % key) return default_value try: value = json_utils.loads(value_str) except Exception: logs.log_warn('Non-serializable value read from cache key %s: "%s"' % (key, value_str)) return default_value if constructor: try: value = constructor(value) except Exception: logs.log_warn('Failed to construct value "%s" using %s ' 'and key "%s" in persistent cache. Using default value %s.' % (value, constructor, key, default_value)) return default_value return value
def _make_corpus_backup_public(target, corpus_fuzzer_name_override, corpus_backup_bucket_name): """Identifies old corpus backups and makes them public.""" corpus_backup_date = utils.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) corpus_backup_url = corpus_manager.gcs_url_for_backup_file( corpus_backup_bucket_name, corpus_fuzzer_name_override or target.engine, target.project_qualified_name(), corpus_backup_date) if not storage.get(corpus_backup_url): logs.log_warn('Failed to find corpus backup %s.' % corpus_backup_url) return if not _set_public_acl_if_needed(corpus_backup_url): return filename = ( corpus_manager.PUBLIC_BACKUP_TIMESTAMP + os.extsep + corpus_manager.BACKUP_ARCHIVE_FORMAT) public_url = os.path.join(os.path.dirname(corpus_backup_url), filename) if not storage.copy_blob(corpus_backup_url, public_url): logs.log_error( 'Failed to overwrite %s with the latest public corpus backup.' % public_url) return if not _set_public_acl_if_needed(public_url): return logs.log('Corpus backup %s is now marked public.' % corpus_backup_url)
def file_issue(testcase): """File an issue to the GitHub repo of the project""" if not _filing_enabled(testcase): return if testcase.github_repo_id and testcase.github_issue_num: logs.log('Issue already filed under' f'issue number {testcase.github_issue_num} in ' f'Repo {testcase.github_repo_id}.') return access_token = _get_access_token() repo = _get_repo(testcase, access_token) if not repo: logs.log('Unable to file issues to the main repo of the project') return if not repo.has_issues: logs.log_warn('Unable to file issues to the main repo: ' 'Repo has disabled issues.') return issue = _post_issue(repo, testcase) _update_testcase_properties(testcase, repo, issue)
def copy_directory_from_worker(worker_directory, host_directory, replace=False): """Recursively copy a directory from the worker. Directories are created as needed. Unless |replace| is True, files already in |host_directory| will remain after this call.""" if replace and os.path.exists(host_directory): shutil.rmtree(host_directory, ignore_errors=True) os.mkdir(host_directory) for worker_file_path in list_files(worker_directory, recursive=True): relative_worker_file_path = os.path.relpath(worker_file_path, worker_directory) host_file_path = os.path.join(host_directory, relative_worker_file_path) # Be careful with the path provided by the worker here. We want to make sure # we're only writing files to |host_directory| and not outside it. if not is_directory_parent(host_file_path, host_directory): logs.log_warn( 'copy_directory_from_worker: Attempt to escape |host_dir|.') return False host_file_directory = os.path.dirname(host_file_path) if not os.path.exists(host_file_directory): os.makedirs(host_file_directory) if not copy_file_from_worker(worker_file_path, host_file_path): logs.log_warn('Failed to copy %s from worker.' % worker_file_path) return False return True
def set_bucket_iam_policy(client, bucket_name, iam_policy): """Set bucket IAM policy.""" filtered_iam_policy = copy.deepcopy(iam_policy) # Bindings returned by getIamPolicy can have duplicates. Remove them or # otherwise, setIamPolicy operation fails. for binding in filtered_iam_policy['bindings']: binding['members'] = sorted(list(set(binding['members']))) # Filtering members can cause a binding to have no members. Remove binding # or otherwise, setIamPolicy operation fails. filtered_iam_policy['bindings'] = [ b for b in filtered_iam_policy['bindings'] if b['members'] ] try: return client.buckets().setIamPolicy( bucket=bucket_name, body=filtered_iam_policy).execute() except HttpError as e: error_reason = _get_error_reason(e) if error_reason == 'Invalid argument': # Expected error for non-Google emails or groups. Warn about these. logs.log_warn( 'Invalid Google email or group being added to bucket %s.' % bucket_name) elif error_reason and 'is of type "group"' in error_reason: logs.log_warn( 'Failed to set IAM policy for %s bucket for a group: %s.' % (bucket_name, error_reason)) else: logs.log_error('Failed to set IAM policies for bucket %s.' % bucket_name) return None
def execute_task(*_): """Execute the report uploads.""" logs.log('Uploading pending reports.') # Get metadata for reports requiring upload. reports_metadata = ndb_utils.get_all_from_query( data_types.ReportMetadata.query( ndb_utils.is_false(data_types.ReportMetadata.is_uploaded))) reports_metadata = list(reports_metadata) if not reports_metadata: logs.log('No reports that need upload found.') return environment.set_value('UPLOAD_MODE', 'prod') # Otherwise, upload corresponding reports. logs.log('Uploading reports for testcases: %s' % str([report.testcase_id for report in reports_metadata])) report_metadata_to_delete = [] for report_metadata in reports_metadata: # Convert metadata back into actual report. crash_info = crash_uploader.crash_report_info_from_metadata( report_metadata) testcase_id = report_metadata.testcase_id try: _ = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: logs.log_warn('Could not find testcase %s.' % testcase_id) report_metadata_to_delete.append(report_metadata.key) continue # Upload the report and update the corresponding testcase info. logs.log('Processing testcase %s for crash upload.' % testcase_id) crash_report_id = crash_info.upload() if crash_report_id is None: logs.log_error( 'Crash upload for testcase %s failed, retry later.' % testcase_id) continue # Update the report metadata to indicate successful upload. report_metadata.crash_report_id = crash_report_id report_metadata.is_uploaded = True report_metadata.put() logs.log('Uploaded testcase %s to crash, got back report id %s.' % (testcase_id, crash_report_id)) time.sleep(1) # Delete report metadata entries where testcase does not exist anymore or # upload is not supported. if report_metadata_to_delete: ndb_utils.delete_multi(report_metadata_to_delete) # Log done with uploads. # Deletion happens in batches in cleanup_task, so that in case of error there # is some buffer for looking at stored ReportMetadata in the meantime. logs.log('Finished uploading crash reports.')
def _process_project(project, bucket): """Collects coverage information for all fuzz targets in the given project and the total stats for the project.""" project_name = _basename(project) logs.log('Processing coverage for %s project.' % project_name) report_path = storage.get_cloud_storage_file_path(bucket, project) report_info = _read_json(report_path) if not report_info: logs.log_warn('Skipping code coverage for %s project.' % project_name) return # Iterate through report_info['fuzzer_stats_dir'] and prepare # CoverageInformation entities for invididual fuzz targets. entities = [] for fuzzer in storage.list_blobs(report_info['fuzzer_stats_dir'], recursive=False): entities.append( _process_fuzzer_stats(fuzzer, report_info, project_name, bucket)) logs.log('Processed coverage for %d targets in %s project.' % (len(entities), project_name)) # Prepare CoverageInformation entity for the total project stats. entities.append(_process_project_stats(report_info, project_name)) ndb_utils.put_multi(entities)
def find_windows_for_process(process_id): """Return visible windows belonging to a process.""" pids = utils.get_process_ids(process_id) if not pids: return [] visible_windows = [] for pid in pids: app = application.Application() try: app.connect(process=pid) except: logs.log_warn('Unable to connect to process.') continue try: windows = app.windows() except: logs.log_warn('Unable to get application windows.') continue for window in windows: try: window.type_keys('') except: continue visible_windows.append(window) return visible_windows
def get_current_user(): """Get the current logged in user, or None.""" if environment.is_local_development(): return User('user@localhost') current_request = request_cache.get_current_request() if local_config.AuthConfig().get('enable_loas'): loas_user = current_request.headers.get( 'X-AppEngine-LOAS-Peer-Username') if loas_user: return User(loas_user + '@google.com') iap_email = get_iap_email(current_request) if iap_email: return User(iap_email) cache_backing = request_cache.get_cache_backing() oauth_email = getattr(cache_backing, '_oauth_email', None) if oauth_email: return User(oauth_email) cached_email = getattr(cache_backing, '_cached_email', None) if cached_email: return User(cached_email) session_cookie = get_session_cookie() if not session_cookie: return None try: decoded_claims = decode_claims(get_session_cookie()) except AuthError: logs.log_warn('Invalid session cookie.') return None allowed_firebase_providers = local_config.ProjectConfig().get( 'firebase.auth_providers', ['google.com']) firebase_info = decoded_claims.get('firebase', {}) sign_in_provider = firebase_info.get('sign_in_provider') if sign_in_provider not in allowed_firebase_providers: logs.log_error(f'Firebase provider {sign_in_provider} is not enabled.') return None # Per https://docs.github.com/en/authentication/ # keeping-your-account-and-data-secure/authorizing-oauth-apps # GitHub requires emails to be verified before an OAuth app can be # authorized, so we make an exception. if (not decoded_claims.get('email_verified') and sign_in_provider != 'github.com'): return None email = decoded_claims.get('email') if not email: return None # We cache the email for this request if we've validated the user to make # subsequent get_current_user() calls fast. setattr(cache_backing, '_cached_email', email) return User(email)
def _get_mutator_plugins_bucket_url(): """Returns the url of the mutator plugin's cloud storage bucket.""" mutator_plugins_bucket = environment.get_value('MUTATOR_PLUGINS_BUCKET') if not mutator_plugins_bucket: logs.log_warn('MUTATOR_PLUGINS_BUCKET is not set in project config, ' 'skipping custom mutator strategy.') return None return 'gs://%s' % mutator_plugins_bucket
def generate_weighted_strategy_pool(strategy_list, use_generator, engine_name): """Generate a strategy pool based on probability distribution from multi armed bandit experimentation.""" # If weighted strategy selection is enabled, there will be a distribution # stored in the environment. distribution = environment.get_value('STRATEGY_SELECTION_DISTRIBUTION') selection_method = environment.get_value('STRATEGY_SELECTION_METHOD', default_value='default') # Otherwise if weighted strategy selection is not enabled (strategy selection # method is default) or if we cannot query properly, generate strategy # pool according to default parameters. We pass the combined list of # multi-armed bandit strategies and manual strategies for consideration in # the default strategy selection process. if not distribution or selection_method == 'default': return generate_default_strategy_pool(strategy_list, use_generator) # Change the distribution to a list of named tuples rather than a list of # dictionaries so that we can use the random_weighted_choice function. Filter # out probability entries from other engines. distribution_tuples = [ StrategyCombination(strategy_name=elem['strategy_name'], probability=elem['probability']) for elem in distribution if elem['engine'] == engine_name ] if not distribution_tuples: logs.log_warn( 'Tried to generate a weighted strategy pool, but do not have ' 'strategy probabilities for %s fuzzing engine.' % engine_name) return generate_default_strategy_pool(strategy_list, use_generator) strategy_selection = utils.random_weighted_choice(distribution_tuples, 'probability') strategy_name = strategy_selection.strategy_name chosen_strategies = strategy_name.split(',') pool = StrategyPool() for strategy_tuple in strategy_list: if strategy_tuple.name in chosen_strategies: pool.add_strategy(strategy_tuple) # We consider certain strategies separately as those are only supported by a # small number of fuzz targets and should be used heavily when available. for value in [ strategy_entry for strategy_entry in strategy_list if strategy_entry.manually_enable ]: if do_strategy(value): pool.add_strategy(value) logs.log('Strategy pool was generated according to weighted distribution. ' 'Chosen strategies: ' + ', '.join(pool.strategy_names)) return pool
def _mark_errored(testcase, revision, error): """Mark testcase as errored out.""" message = 'Received error from external infra, marking testcase as NA.' logs.log_warn(message, error=error, testcase_id=testcase.key.id()) testcase.fixed = 'NA' testcase.open = False data_handler.update_progression_completion_metadata(testcase, revision, message=message)
def handle_update(testcase, revision, stacktrace, error): """Handle update.""" logs.log('Got external update for testcase.', testcase_id=testcase.key.id()) if error: _mark_errored(testcase, revision, error) return last_tested_revision = (testcase.get_metadata('last_tested_revision') or testcase.crash_revision) if revision < last_tested_revision: logs.log_warn(f'Revision {revision} less than previously tested ' f'revision {last_tested_revision}.') return fuzz_target = testcase.get_fuzz_target() if fuzz_target: fuzz_target_name = fuzz_target.binary else: fuzz_target_name = None # Record use of fuzz target to avoid garbage collection (since fuzz_task does # not run). data_handler.record_fuzz_target(fuzz_target.engine, fuzz_target.binary, testcase.job_type) state = stack_analyzer.get_crash_data(stacktrace, fuzz_target=fuzz_target_name, symbolize_flag=False, already_symbolized=True, detect_ooms_and_hangs=True) crash_comparer = CrashComparer(state.crash_state, testcase.crash_state) if not crash_comparer.is_similar(): logs.log(f'State no longer similar (' f'testcase_id={testcase.key.id()}, ' f'old_state={testcase.crash_state}, ' f'new_state={state.crash_state})') _mark_as_fixed(testcase, revision) return is_security = crash_analyzer.is_security_issue(state.crash_stacktrace, state.crash_type, state.crash_address) if is_security != testcase.security_flag: logs.log(f'Security flag for {testcase.key.id()} no longer matches.') _mark_as_fixed(testcase, revision) return logs.log(f'{testcase.key.id()} still crashes.') testcase.last_tested_crash_stacktrace = stacktrace data_handler.update_progression_completion_metadata(testcase, revision, is_crash=True)
def get_component_range_list(start_revision, end_revision, job_type, platform_id=None): """Gets revision variable ranges for a changeset range.""" start_component_revisions_dict = get_component_revisions_dict( start_revision, job_type, platform_id=platform_id) if start_revision == end_revision: end_component_revisions_dict = start_component_revisions_dict else: end_component_revisions_dict = get_component_revisions_dict( end_revision, job_type, platform_id=platform_id) if (start_component_revisions_dict is None or end_component_revisions_dict is None): return [] component_revisions = [] keys = get_components_list(end_component_revisions_dict, job_type) for key in keys: if not start_component_revisions_dict: # 0 start revision, can only show link text. end_component_display_revision = _get_display_revision( end_component_revisions_dict[key]) component_name = end_component_revisions_dict[key]['name'] component_revisions.append({ 'component': component_name, 'link_text': '0:%s' % end_component_display_revision }) continue if key not in start_component_revisions_dict: logs.log_warn('Key %s not found in start revision %s for job %s.' % (key, start_revision, job_type)) continue start_component_revision_dict = start_component_revisions_dict[key] end_component_revision_dict = end_component_revisions_dict[key] component_revisions.append({ 'component': start_component_revision_dict['name'], 'link_text': _get_link_text(start_component_revision_dict, end_component_revision_dict), 'link_url': _get_link_url(start_component_revision_dict, end_component_revision_dict) }) return component_revisions
def configure(force_enable=False): """Configure airplane mode and wifi on device.""" # The reproduce tool shouldn't inherit wifi settings from jobs. if environment.get_value('REPRODUCE_TOOL'): return # Airplane mode should be disabled in all cases. This can get inadvertently # turned on via gestures. disable_airplane_mode() # Need to disable wifi before changing configuration. disable() # Check if wifi needs to be enabled. If not, then no need to modify the # supplicant file. wifi_enabled = force_enable or environment.get_value('WIFI', True) if not wifi_enabled: # No more work to do, we already disabled it at start. return # Wait 2 seconds to allow the wifi to be enabled. enable() time.sleep(2) # Install helper apk to configure wifi. wifi_util_apk_path = os.path.join( environment.get_platform_resources_directory(), 'wifi_util.apk') if not app.is_installed(WIFI_UTIL_PACKAGE_NAME): app.install(wifi_util_apk_path) # Get ssid and password from admin configuration. if environment.is_android_cuttlefish(): wifi_ssid = 'VirtWifi' wifi_password = '' else: config = db_config.get() if not config.wifi_ssid: logs.log('No wifi ssid is set, skipping wifi config.') return wifi_ssid = config.wifi_ssid wifi_password = config.wifi_password or '' connect_wifi_command = ( 'am instrument -e method connectToNetwork -e ssid {ssid} ') if wifi_password: connect_wifi_command += '-e psk {password} ' connect_wifi_command += '-w {call_path}' output = adb.run_shell_command( connect_wifi_command.format(ssid=quote(wifi_ssid), password=quote(wifi_password), call_path=WIFI_UTIL_CALL_PATH)) if 'result=true' not in output: logs.log_warn('Failed to connect to wifi.', output=output)
def _merge_new_units(self, target_path, corpus_dir, new_corpus_dir, fuzz_corpus_dirs, arguments, stat_overrides): """Merge new units.""" # Make a decision on whether merge step is needed at all. If there are no # new units added by libFuzzer run, then no need to do merge at all. new_units_added = shell.get_directory_file_count(new_corpus_dir) if not new_units_added: stat_overrides['new_units_added'] = 0 logs.log( 'Skipped corpus merge since no new units added by fuzzing.') return # If this times out, it's possible that we will miss some units. However, if # we're taking >10 minutes to load/merge the corpus something is going very # wrong and we probably don't want to make things worse by adding units # anyway. merge_corpus = self._create_merge_corpus_dir() merge_dirs = fuzz_corpus_dirs[:] # Merge the new units with the initial corpus. if corpus_dir not in merge_dirs: merge_dirs.append(corpus_dir) old_corpus_len = shell.get_directory_file_count(corpus_dir) new_units_added = 0 try: result = self._minimize_corpus_two_step( target_path=target_path, arguments=arguments, existing_corpus_dirs=merge_dirs, new_corpus_dir=new_corpus_dir, output_corpus_dir=merge_corpus, reproducers_dir=None, max_time=engine_common.get_merge_timeout( libfuzzer.DEFAULT_MERGE_TIMEOUT)) libfuzzer.move_mergeable_units(merge_corpus, corpus_dir) new_corpus_len = shell.get_directory_file_count(corpus_dir) new_units_added = new_corpus_len - old_corpus_len stat_overrides.update(result.stats) except (MergeError, TimeoutError) as e: logs.log_warn('Merge failed.', error=repr(e)) stat_overrides['new_units_added'] = new_units_added # Record the stats to make them easily searchable in stackdriver. logs.log('Stats calculated.', stats=stat_overrides) if new_units_added: logs.log(f'New units added to corpus: {new_units_added}.') else: logs.log('No new units found.')
def _read_json(url): """Returns a JSON obejct loaded from the given GCS url.""" data = storage.read_data(url) result = None try: result = json.loads(data) except Exception as e: logs.log_warn('Empty or malformed code coverage JSON (%s): %s.' % (url, str(e))) return result
def execute_command(cmd, timeout=None, log_error=True, on_cuttlefish_host=False): """Spawns a subprocess to run the given shell command.""" if on_cuttlefish_host and environment.is_android_cuttlefish(): # Auto accept key fingerprint for ssh command. cmd = ('ssh -o StrictHostKeyChecking=no ' f'{get_cuttlefish_ssh_target()} "{cmd}"') so = [] # pylint: disable=consider-using-with output_dest = tempfile.TemporaryFile() # pylint: disable=subprocess-popen-preexec-fn,consider-using-with pipe = subprocess.Popen( cmd, executable='/bin/bash', stdout=output_dest, stderr=subprocess.STDOUT, shell=True, preexec_fn=lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL), bufsize=0) def run(): """Thread target function that waits for subprocess to complete.""" try: pipe.communicate() output_dest.seek(0) output = output_dest.read() output_dest.close() if output: so.append(output) except OSError as _: logs.log_warn('Failed to retrieve stdout from: %s' % cmd) if pipe.returncode: if log_error: logs.log_warn( '%s returned %d error code.' % (cmd, pipe.returncode), output=output) thread = threading.Thread(target=run) thread.start() thread.join(timeout) if thread.is_alive(): logs.log_warn('Command %s timed out. Killing process.' % cmd) try: pipe.kill() except OSError: # Can't kill a dead process. pass return None bytes_output = b''.join(so) return bytes_output.strip().decode('utf-8', errors='ignore')
def _handle_rsync_result(gsutil_result, max_errors): """Handle rsync result.""" if gsutil_result.return_code == 0: sync_succeeded = True else: logs.log_warn( 'gsutil rsync got non-zero:\n' 'Command: %s\n' 'Output: %s\n' % (gsutil_result.command, gsutil_result.output)) sync_succeeded = _rsync_errors_below_threshold(gsutil_result, max_errors) return sync_succeeded and not gsutil_result.timed_out
def terminate_process(process_id, kill=False): """Terminates a process by its process id.""" try: process = psutil.Process(process_id) if kill: process.kill() else: process.terminate() except (psutil.AccessDenied, psutil.NoSuchProcess, OSError): logs.log_warn('Failed to terminate process.')
def delete(self, remote_path): """Delete a remote file.""" client = _storage_client() bucket_name, path = get_bucket_name_and_path(remote_path) try: bucket = client.bucket(bucket_name) bucket.delete_blob(path) except google.cloud.exceptions.GoogleCloudError: logs.log_warn('Failed to delete cloud storage file %s.' % remote_path) raise return True
def _do_heartbeat(): """Heartbeat thread.""" # grpc stubs and channels should be thread-safe. heartbeat_stub = heartbeat_pb2_grpc.HeartbeatStub(_host_state.channel) while True: try: heartbeat_stub.Beat( heartbeat_pb2.HeartbeatRequest(), timeout=config.HEARTBEAT_TIMEOUT_SECONDS) except grpc.RpcError as e: logs.log_warn('worker heartbeat failed: ' + repr(e)) time.sleep(config.HEARTBEAT_INTERVAL_SECONDS)
def is_fuzz_target_local(file_path, file_handle=None): """Returns whether |file_path| is a fuzz target binary (local path).""" # TODO(hzawawy): Handle syzkaller case. filename, file_extension = os.path.splitext(os.path.basename(file_path)) if not VALID_TARGET_NAME_REGEX.match(filename): # Check fuzz target has a valid name (without any special chars). return False if BLOCKLISTED_TARGET_NAME_REGEX.match(filename): # Check fuzz target an explicitly disallowed name (e.g. binaries used for # jazzer-based targets). return False if file_extension not in ALLOWED_FUZZ_TARGET_EXTENSIONS: # Ignore files with disallowed extensions (to prevent opening e.g. .zips). return False if not file_handle and not os.path.exists(file_path): # Ignore non-existent files for cases when we don't have a file handle. return False if filename.endswith('_fuzzer'): return True # TODO(aarya): Remove this optimization if it does not show up significant # savings in profiling results. fuzz_target_name_regex = environment.get_value('FUZZER_NAME_REGEX') if fuzz_target_name_regex: return bool(re.match(fuzz_target_name_regex, filename)) if os.path.exists(file_path) and not stat.S_ISREG( os.stat(file_path).st_mode): # Don't read special files (eg: /dev/urandom). logs.log_warn('Tried to read from non-regular file: %s.' % file_path) return False # Use already provided file handle or open the file. local_file_handle = file_handle or open(file_path, 'rb') # TODO(metzman): Bound this call so we don't read forever if something went # wrong. result = utils.search_bytes_in_file(FUZZ_TARGET_SEARCH_BYTES, local_file_handle) if not file_handle: # If this local file handle is owned by our function, close it now. # Otherwise, it is caller's responsibility. local_file_handle.close() return result
def generate_new_testcase_mutations_using_radamsa( corpus_directory, new_testcase_mutations_directory, generation_timeout): """Generate new testcase mutations based on Radamsa.""" radamsa_path = get_radamsa_path() if not radamsa_path: # Mutations using radamsa are not supported on current platform, bail out. return radamsa_runner = new_process.ProcessRunner(radamsa_path) files_list = shell.get_files_list(corpus_directory) filtered_files_list = [ f for f in files_list if os.path.getsize(f) <= CORPUS_INPUT_SIZE_LIMIT ] if not filtered_files_list: # No mutations to do on an empty corpus or one with very large files. return old_corpus_size = shell.get_directory_file_count( new_testcase_mutations_directory) expected_completion_time = time.time() + generation_timeout for i in range(RADAMSA_MUTATIONS): original_file_path = random_choice(filtered_files_list) original_filename = os.path.basename(original_file_path) output_path = os.path.join( new_testcase_mutations_directory, get_radamsa_output_filename(original_filename, i)) result = radamsa_runner.run_and_wait( ['-o', output_path, original_file_path], timeout=RADAMSA_TIMEOUT) if (os.path.exists(output_path) and os.path.getsize(output_path) > CORPUS_INPUT_SIZE_LIMIT): # Skip large files to avoid further mutations and impact fuzzing # efficiency. shell.remove_file(output_path) elif result.return_code or result.timed_out: logs.log_warn('Radamsa failed to mutate or timed out.', output=result.output) # Check if we exceeded our timeout. If yes, do no more mutations and break. if time.time() > expected_completion_time: break new_corpus_size = shell.get_directory_file_count( new_testcase_mutations_directory) logs.log('Added %d tests using Radamsa mutations.' % (new_corpus_size - old_corpus_size))
def copy_file_from(self, remote_path, local_path): """Copy file from a remote path to a local path.""" client = _storage_client() bucket_name, path = get_bucket_name_and_path(remote_path) try: bucket = client.bucket(bucket_name) blob = bucket.blob(path, chunk_size=self._chunk_size()) blob.download_to_filename(local_path) except google.cloud.exceptions.GoogleCloudError: logs.log_warn('Failed to copy cloud storage file %s to local file %s.' % (remote_path, local_path)) raise return True
def read_data(self, remote_path): """Read the data of a remote file.""" bucket_name, path = get_bucket_name_and_path(remote_path) client = _storage_client() try: bucket = client.bucket(bucket_name) blob = bucket.blob(path, chunk_size=self._chunk_size()) return blob.download_as_string() except google.cloud.exceptions.GoogleCloudError as e: if e.code == 404: return None logs.log_warn('Failed to read cloud storage file %s.' % remote_path) raise
def clear_data_directories_on_low_disk_space(): """Clear all data directories on low disk space. This should ideally never happen, but when it does, we do this to keep the bot working in sane state.""" free_disk_space = get_free_disk_space() if free_disk_space is None: # Can't determine free disk space, bail out. return if free_disk_space >= _low_disk_space_threshold(): return logs.log_warn( 'Low disk space detected, clearing all data directories to free up space.' ) clear_data_directories()