Esempio n. 1
0
    def run_and_wait(self, *args, **kwargs):  # pylint: disable=arguments-differ
        """Overridden run_and_wait which always decodes the output."""
        result = ProcessRunner.run_and_wait(self, *args, **kwargs)
        if result.output is not None:
            result.output = utils.decode_to_unicode(result.output)

        return result
Esempio n. 2
0
    def communicate(self, input=None):  # pylint: disable=redefined-builtin
        """subprocess.Popen.communicate."""
        stdout = b''
        stderr = b''

        if self._interactive:
            if input:
                self._popen.stdin.write(input)

            while True:
                line = self._popen.stdout.readline()
                if not line:
                    break

                if self._stdout_file:
                    self._stdout_file.write(line)
                else:
                    stdout += line

                sys.stdout.write(utils.decode_to_unicode(line))

            self._popen.wait()
        else:
            stdout, stderr = self._popen.communicate(input)

        if not self._max_stdout_len:
            return stdout, stderr

        with self._stdout_file:
            return utils.read_from_handle_truncated(
                self._stdout_file, self._max_stdout_len), stderr
Esempio n. 3
0
def _find_sanitizer_stacktrace(reproducers_dir):
    """Find the sanitizer stacktrace from the reproducers dir."""
    for stacktrace_path in glob.glob(
            os.path.join(reproducers_dir, _HF_SANITIZER_LOG_PREFIX + '*')):
        with open(stacktrace_path, 'rb') as f:
            return utils.decode_to_unicode(f.read())

    return None
Esempio n. 4
0
def undercoat_api_command(*args):
  """Make an API call to the undercoat binary."""
  logs.log(f'Running undercoat command {args}')
  bundle_dir = environment.get_value('FUCHSIA_RESOURCES_DIR')
  undercoat_path = os.path.join(bundle_dir, 'undercoat', 'undercoat')
  undercoat = new_process.ProcessRunner(undercoat_path, args)
  # The undercoat log is sent to stderr, which we capture to a tempfile
  with tempfile.TemporaryFile() as undercoat_log:
    result = undercoat.run_and_wait(
        stderr=undercoat_log, extra_env={'TMPDIR': get_temp_dir()})
    result.output = utils.decode_to_unicode(result.output)

    if result.return_code != 0:
      # Dump the undercoat log to assist in debugging
      log_data = utils.read_from_handle_truncated(undercoat_log, 1024 * 1024)
      logs.log_warn('Log output from undercoat: ' +
                    utils.decode_to_unicode(log_data))

      # The API error message is returned on stdout
      raise UndercoatError(
          'Error running undercoat command %s: %s' % (args, result.output))

  return result
Esempio n. 5
0
  def process_stacktrace(self, unsymbolized_crash_stacktrace):
    self.frame_no = 0
    symbolized_crash_stacktrace = u''
    unsymbolized_crash_stacktrace_lines = \
      unsymbolized_crash_stacktrace.splitlines()
    if lkl.is_lkl_stack_trace(unsymbolized_crash_stacktrace):
      line_parser = self._lkl_line_parser
      self.lkl_binary_name = lkl.get_lkl_binary_name(
          unsymbolized_crash_stacktrace_lines)
      # This should never happen but if it does, lets just return the unsymbolized stack.
      # We can't symbolize anything anyways.
      if not self.lkl_binary_name:
        return unsymbolized_crash_stacktrace
    else:
      line_parser = self._line_parser

    for line in unsymbolized_crash_stacktrace_lines:
      self.current_line = utils.decode_to_unicode(line.rstrip())
      frameno_str, addr, binary, offset, arch = line_parser(line)
      if not binary or not offset:
        symbolized_crash_stacktrace += u'%s\n' % self.current_line
        continue

      if frameno_str == '0':
        # Assume that frame #0 is the first frame of new stack trace.
        self.frame_no = 0
      original_binary = binary
      if self.binary_path_filter:
        binary = self.binary_path_filter(binary)
      symbolized_line = self.symbolize_address(addr, binary, offset, arch)
      if not symbolized_line:
        if original_binary != binary:
          symbolized_line = self.symbolize_address(addr, original_binary,
                                                   offset, arch)

      if not symbolized_line:
        symbolized_crash_stacktrace += u'%s\n' % self.current_line
      else:
        for symbolized_frame in symbolized_line:
          symbolized_crash_stacktrace += u'%s\n' % (
              '    #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip())
          self.frame_no += 1

    # Close any left-over open pipes.
    for pipe in pipes:
      pipe.stdin.close()
      pipe.stdout.close()
      pipe.kill()

    return symbolized_crash_stacktrace
Esempio n. 6
0
    def __init__(self,
                 return_code,
                 crash_time,
                 output,
                 unexpected_crash=False):
        self.return_code = return_code
        self.crash_time = crash_time
        self.output = utils.decode_to_unicode(
            output) if output else 'No output!'
        # For crashes against an expected state, this indicates whether if there
        # was a crash that didn't match.
        self.unexpected_crash = unexpected_crash

        self._symbolized_crash_data = None
        self._unsymbolized_crash_data = None
def _get_stats_from_log(log_path,
                        strategies=None,
                        arguments=None,
                        stats_overrides=None):
    """Calculate stats for the given log the same way as the engine does."""
    if strategies is None:
        strategies = []
    if arguments is None:
        arguments = []

    log_lines = utils.decode_to_unicode(
        utils.read_data_from_file(log_path, eval_data=False)).splitlines()
    stats = libfuzzer.parse_log_stats(log_lines)
    stats.update(
        performance_stats.parse_performance_features(log_lines, strategies,
                                                     arguments))
    if stats_overrides:
        stats.update(stats_overrides)

    return stats
def get_crash_info_and_stacktrace(application_command_line, crash_stacktrace,
                                  gestures):
    """Return crash minidump location and updated crash stacktrace."""
    app_name_lower = environment.get_value('APP_NAME').lower()
    retry_limit = environment.get_value('FAIL_RETRIES')
    using_android = environment.is_android()
    using_chrome = 'chrome' in app_name_lower or 'chromium' in app_name_lower
    warmup_timeout = environment.get_value('WARMUP_TIMEOUT', 90)

    # Minidump generation is only applicable on Chrome application.
    # FIXME: Support minidump generation on platforms other than Android.
    if not using_chrome or not using_android:
        return None, crash_stacktrace

    # Get the crash info from stacktrace.
    crash_info = get_crash_info(crash_stacktrace)

    # If we lost the minidump file, we need to recreate it.
    # Note that because of the way crash_info is generated now, if we have a
    # non-None crash_info, we should also have its minidump path; we insert
    # the check to safeguard against possibly constructing the crash_info in
    # other ways in the future that might potentially lose the minidump path.
    if not crash_info or not crash_info.minidump_info.path:
        for _ in range(retry_limit):
            _, _, output = (process_handler.run_process(
                application_command_line,
                timeout=warmup_timeout,
                gestures=gestures))

            crash_info = get_crash_info(output)
            if crash_info and crash_info.minidump_info.path:
                crash_stacktrace = utils.decode_to_unicode(output)
                break

        if not crash_info or not crash_info.minidump_info.path:
            # We could not regenerate a minidump for this crash.
            logs.log('Unable to regenerate a minidump for this crash.')

    return crash_info, crash_stacktrace
Esempio n. 9
0
def execute(input_directory, output_directory, fuzzer_name,
            generation_timeout):
    """Execute ML RNN generator to produce new inputs.

  This method should be called inside launcher, to generate a number of
  new inputs based on ML RNN model.

  It will fetch ML model from GCS bucket specified in environment
  variable `CORPUS_BUCKET`. The script to run the model resides
  in folder `tools/fuzzers/ml/rnn`.

  Args:
    input_directory: Seed corpus path. The directory should not be empty.
    output_directory: The directory to place generated inputs.
    fuzzer_name: Name of the fuzzer, e.g libpng_read_fuzzer. It indicates the
        subdirectory in gcs bucket to store models.
    generation_timeout: Time in seconds for the generator to run. Normally it
        takes <1s to generate an input, assuming the input length is <4KB.
  """
    if environment.platform() != 'LINUX':
        logs.log('Unsupported platform for ML RNN generation, skipping.')
        return

    # Validate corpus folder.
    file_count = shell.get_directory_file_count(input_directory)
    if not file_count:
        logs.log('Corpus is empty. Skip generation.')
        return

    # Number of existing new inputs. They are possibly generated by other
    # generators.
    old_corpus_units = shell.get_directory_file_count(output_directory)
    old_corpus_bytes = shell.get_directory_size(output_directory)

    # Get model path.
    model_path = prepare_model_directory(fuzzer_name)
    if not model_path:
        return

    result = run(input_directory, output_directory, model_path,
                 generation_timeout)

    # Generation process exited abnormally but not caused by timeout, meaning
    # error occurred during execution.
    if result.return_code and not result.timed_out:
        if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL:
            logs.log_warn(
                'ML RNN generation for fuzzer %s aborted due to small corpus.'
                % fuzzer_name)
        else:
            logs.log_error(
                'ML RNN generation for fuzzer %s failed with ExitCode = %d.' %
                (fuzzer_name, result.return_code),
                output=utils.decode_to_unicode(result.output))
        return

    # Timeout is not error, if we have new units generated.
    if result.timed_out:
        logs.log_warn('ML RNN generation for fuzzer %s timed out.' %
                      fuzzer_name)

    new_corpus_units = (shell.get_directory_file_count(output_directory) -
                        old_corpus_units)
    new_corpus_bytes = (shell.get_directory_size(output_directory) -
                        old_corpus_bytes)
    if new_corpus_units:
        logs.log(
            'Added %d new inputs (%d bytes) using ML RNN generator for %s.' %
            (new_corpus_units, new_corpus_bytes, fuzzer_name))
    else:
        logs.log_error('ML RNN generator did not produce any inputs for %s' %
                       fuzzer_name,
                       output=utils.decode_to_unicode(result.output))
Esempio n. 10
0
def parse_mime_to_crash_report_info(local_minidump_mime_path):
    """Read the (local) minidump MIME file into a CrashReportInfo object."""
    # Get the minidump name and path.
    minidump_path_match = re.match(r'(.*)\.mime', local_minidump_mime_path)
    if minidump_path_match is None:
        logs.log_error('Minidump filename in unexpected format: \'%s\'.' %
                       local_minidump_mime_path)
        return None
    minidump_path = '%s.dmp' % minidump_path_match.group(1).strip()

    # Reformat the minidump MIME to include the boundary.
    with open(local_minidump_mime_path, 'rb') as minidump_mime_file_content:
        # The boundary is the first line after the first two dashes.
        boundary = minidump_mime_file_content.readline().strip()[2:]
        minidump_mime_bytes = (
            b'Content-Type: multipart/form-data; boundary=\"%s\"\r\n--%s\r\n' %
            (boundary, boundary))
        minidump_mime_bytes += minidump_mime_file_content.read()

    minidump_mime_contents = email.message_from_bytes(minidump_mime_bytes)

    # Parse the MIME contents, extracting the parameters needed for upload.
    mime_key_values = {}
    for mime_part in minidump_mime_contents.get_payload():
        if isinstance(mime_part, str):
            mime_part = utils.decode_to_unicode(mime_part)
            logs.log_error('Unexpected str mime_part from mime path %s: %s' %
                           (local_minidump_mime_path, mime_part))
            continue
        part_descriptor = list(mime_part.values())
        key_tokens = part_descriptor[0].split('; ')
        key_match = re.match(r'name="(.*)".*', key_tokens[1])

        # Extract from the MIME part the key-value pairs used by report uploading.
        if key_match is not None:
            report_key = key_match.group(1)
            report_value = mime_part.get_payload(decode=True)
            if report_key == MINIDUMP_FILE_KEY:
                utils.write_data_to_file(report_value, minidump_path)
            else:
                # Take care of aliases.
                if report_key in ('prod', 'buildTargetId'):
                    report_key = PRODUCT_KEY
                elif report_key == 'ver':
                    report_key = VERSION_KEY

                # Save the key-value pair.
                mime_key_values[report_key] = report_value

    # Pull out product and version explicitly since these are required
    # for upload.
    product, version = None, None
    if PRODUCT_KEY in mime_key_values:
        product = mime_key_values.pop(PRODUCT_KEY).decode('utf-8')
    else:
        logs.log_error(
            'Could not find \'%s\' or alias in mime_key_values key.' %
            PRODUCT_KEY)
    if VERSION_KEY in mime_key_values:
        version = mime_key_values.pop(VERSION_KEY).decode('utf-8')
    else:
        logs.log_error(
            'Could not find \'%s\' or alias in mime_key_values key.' %
            VERSION_KEY)

    # If missing, return None and log keys that do exist; otherwise, construct
    # CrashReportInfo and return.
    if product is None or version is None:
        logs.log_error('mime_key_values dict keys:\n%s' %
                       str(list(mime_key_values.keys())))
        return None

    return CrashReportInfo(minidump_path=minidump_path,
                           product=product,
                           version=version,
                           optional_params=mime_key_values)
Esempio n. 11
0
def run_process(cmdline,
                current_working_directory=None,
                timeout=DEFAULT_TEST_TIMEOUT,
                need_shell=False,
                gestures=None,
                env_copy=None,
                testcase_run=True,
                ignore_children=True):
    """Executes a process with a given command line and other parameters."""
    if environment.is_trusted_host() and testcase_run:
        from clusterfuzz._internal.bot.untrusted_runner import remote_process_host
        return remote_process_host.run_process(cmdline,
                                               current_working_directory,
                                               timeout, need_shell, gestures,
                                               env_copy, testcase_run,
                                               ignore_children)

    if gestures is None:
        gestures = []

    if env_copy:
        os.environ.update(env_copy)

    # FIXME(mbarbella): Using LAUNCHER_PATH here is error prone. It forces us to
    # do certain operations before fuzzer setup (e.g. bad build check).
    launcher = environment.get_value('LAUNCHER_PATH')

    # This is used when running scripts on native linux OS and not on the device.
    # E.g. running a fuzzer to generate testcases or launcher script.
    plt = environment.platform()
    runs_on_device = environment.is_android(plt) or plt == 'FUCHSIA'
    if runs_on_device and (not testcase_run or launcher):
        plt = 'LINUX'

    is_android = environment.is_android(plt)

    # Lower down testcase timeout slightly to account for time for crash analysis.
    timeout -= CRASH_ANALYSIS_TIME

    # LeakSanitizer hack - give time for stdout/stderr processing.
    lsan = environment.get_value('LSAN', False)
    if lsan:
        timeout -= LSAN_ANALYSIS_TIME

    # Initialize variables.
    adb_output = None
    process_output = ''
    process_status = None
    return_code = 0
    process_poll_interval = environment.get_value('PROCESS_POLL_INTERVAL', 0.5)
    start_time = time.time()
    watch_for_process_exit = (environment.get_value('WATCH_FOR_PROCESS_EXIT')
                              if is_android else True)
    window_list = []

    # Get gesture start time from last element in gesture list.
    gestures = copy.deepcopy(gestures)
    if gestures and gestures[-1].startswith('Trigger'):
        gesture_start_time = int(gestures[-1].split(':')[1])
        gestures.pop()
    else:
        gesture_start_time = timeout // 2

    if is_android:
        # Clear the log upfront.
        android.logger.clear_log()

        # Run the app.
        adb_output = android.adb.run_command(cmdline, timeout=timeout)
    else:
        cmd = shell.get_command(cmdline)

        process_output = mozprocess.processhandler.StoreOutput()
        process_status = ProcessStatus()
        try:
            process_handle = mozprocess.ProcessHandlerMixin(
                cmd,
                args=None,
                cwd=current_working_directory,
                shell=need_shell,
                processOutputLine=[process_output],
                onFinish=[process_status],
                ignore_children=ignore_children)
            start_process(process_handle)
        except:
            logs.log_error('Exception occurred when running command: %s.' %
                           cmdline)
            return None, None, ''

    while True:
        time.sleep(process_poll_interval)

        # Run the gestures at gesture_start_time or in case we didn't find windows
        # in the last try.
        if (gestures and time.time() - start_time >= gesture_start_time
                and not window_list):
            # In case, we don't find any windows, we increment the gesture start time
            # so that the next check is after 1 second.
            gesture_start_time += 1

            if plt == 'LINUX':
                linux.gestures.run_gestures(gestures, process_handle.pid,
                                            process_status, start_time,
                                            timeout, window_list)
            elif plt == 'WINDOWS':
                windows.gestures.run_gestures(gestures, process_handle.pid,
                                              process_status, start_time,
                                              timeout, window_list)
            elif is_android:
                android.gestures.run_gestures(gestures, start_time, timeout)

                # TODO(mbarbella): We add a fake window here to prevent gestures on
                # Android from getting executed more than once.
                window_list = ['FAKE']

        if time.time() - start_time >= timeout:
            break

        # Collect the process output.
        output = (android.logger.log_output()
                  if is_android else b'\n'.join(process_output.output))
        output = utils.decode_to_unicode(output)
        if crash_analyzer.is_memory_tool_crash(output):
            break

        # Check if we need to bail out on process exit.
        if watch_for_process_exit:
            # If |watch_for_process_exit| is set, then we already completed running
            # our app launch command. So, we can bail out.
            if is_android:
                break

            # On desktop, we bail out as soon as the process finishes.
            if process_status and process_status.finished:
                # Wait for process shutdown and set return code.
                process_handle.wait(timeout=PROCESS_CLEANUP_WAIT_TIME)
                break

    # Process output based on platform.
    if is_android:
        # Get current log output. If device is in reboot mode, logcat automatically
        # waits for device to be online.
        time.sleep(ANDROID_CRASH_LOGCAT_WAIT_TIME)
        output = android.logger.log_output()

        if android.constants.LOW_MEMORY_REGEX.search(output):
            # If the device is low on memory, we should force reboot and bail out to
            # prevent device from getting in a frozen state.
            logs.log('Device is low on memory, rebooting.', output=output)
            android.adb.hard_reset()
            android.adb.wait_for_device()

        elif android.adb.time_since_last_reboot() < time.time() - start_time:
            # Check if a reboot has happened, if yes, append log output before reboot
            # and kernel logs content to output.
            log_before_last_reboot = android.logger.log_output_before_last_reboot(
            )
            kernel_log = android.adb.get_kernel_log_content()
            output = '%s%s%s%s%s' % (
                log_before_last_reboot,
                utils.get_line_seperator('Device rebooted'), output,
                utils.get_line_seperator('Kernel Log'), kernel_log)
            # Make sure to reset SE Linux Permissive Mode. This can be done cheaply
            # in ~0.15 sec and is needed especially between runs for kernel crashes.
            android.adb.run_as_root()
            android.settings.change_se_linux_to_permissive_mode()
            return_code = 1

        # Add output from adb to the front.
        if adb_output:
            output = '%s\n\n%s' % (adb_output, output)

        # Kill the application if it is still running. We do this at the end to
        # prevent this from adding noise to the logcat output.
        task_name = environment.get_value('TASK_NAME')
        child_process_termination_pattern = environment.get_value(
            'CHILD_PROCESS_TERMINATION_PATTERN')
        if task_name == 'fuzz' and child_process_termination_pattern:
            # In some cases, we do not want to terminate the application after each
            # run to avoid long startup times (e.g. for chrome). Terminate processes
            # matching a particular pattern for light cleanup in this case.
            android.adb.kill_processes_and_children_matching_name(
                child_process_termination_pattern)
        else:
            # There is no special termination behavior. Simply stop the application.
            android.app.stop()

    else:
        # Get the return code in case the process has finished already.
        # If the process hasn't finished, return_code will be None which is what
        # callers expect unless the output indicates a crash.
        return_code = process_handle.poll()

        # If the process is still running, then terminate it.
        if not process_status.finished:
            launcher_with_interpreter = shell.get_execute_command(
                launcher) if launcher else None
            if (launcher_with_interpreter
                    and cmdline.startswith(launcher_with_interpreter)):
                # If this was a launcher script, we KILL all child processes created
                # except for APP_NAME.
                # It is expected that, if the launcher script terminated normally, it
                # cleans up all the child processes it created itself.
                terminate_root_and_child_processes(process_handle.pid)
            else:
                try:
                    # kill() here actually sends SIGTERM on posix.
                    process_handle.kill()
                except:
                    pass

        if lsan:
            time.sleep(LSAN_ANALYSIS_TIME)

        output = b'\n'.join(process_output.output)
        output = utils.decode_to_unicode(output)

        # X Server hack when max client reached.
        if ('Maximum number of clients reached' in output
                or 'Unable to get connection to X server' in output):
            logs.log_error('Unable to connect to X server, exiting.')
            os.system('sudo killall -9 Xvfb blackbox >/dev/null 2>&1')
            sys.exit(0)

    if testcase_run and (crash_analyzer.is_memory_tool_crash(output)
                         or crash_analyzer.is_check_failure_crash(output)):
        return_code = 1

    # If a crash is found, then we add the memory state as well.
    if return_code and is_android:
        ps_output = android.adb.get_ps_output()
        if ps_output:
            output += utils.get_line_seperator('Memory Statistics')
            output += ps_output

    if return_code:
        logs.log_warn('Process (%s) ended with exit code (%s).' %
                      (repr(cmdline), str(return_code)),
                      output=output)

    return return_code, round(time.time() - start_time, 1), output
Esempio n. 12
0
def execute_task(full_fuzzer_name, job_type):
    """Execute ML RNN training task.

  The task is training RNN model by default. If more models are developed,
  arguments can be modified to specify which model to use.

  Args:
    fuzzer_name: Name of fuzzer, e.g. libpng_read_fuzzer.
    job_type: Job type, e.g. libfuzzer_chrome_asan.
  """
    del job_type

    # Sets up fuzzer binary build.
    fuzz_target = data_handler.get_fuzz_target(full_fuzzer_name)
    if not fuzz_target:
        logs.log_warn(
            f'Fuzzer not found: {full_fuzzer_name}, skip RNN training.')
        return
    fuzzer_name = fuzz_target.project_qualified_name()

    # Directory to place training files, such as logs, models, corpus.
    # Use |FUZZ_INPUTS_DISK| since it is not size constrained.
    temp_directory = environment.get_value('FUZZ_INPUTS_DISK')

    # Get corpus.
    corpus_directory = get_corpus_directory(temp_directory, fuzzer_name)
    shell.remove_directory(corpus_directory, recreate=True)

    logs.log('Downloading corpus backup for %s.' % fuzzer_name)

    if not ml_train_utils.get_corpus(corpus_directory, fuzzer_name):
        logs.log_error('Failed to download corpus backup for %s.' %
                       fuzzer_name)
        return

    # Get the directory to save models.
    model_directory = get_model_files_directory(temp_directory, fuzzer_name)
    shell.remove_directory(model_directory, recreate=True)

    # Get the directory to save training logs.
    log_directory = get_model_log_directory(temp_directory, fuzzer_name)
    shell.remove_directory(log_directory, recreate=True)

    result = train_rnn(corpus_directory, model_directory, log_directory)

    # Training process exited abnormally but not caused by timeout, meaning
    # error occurred during execution.
    if result.return_code and not result.timed_out:
        if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL:
            logs.log_warn(
                'ML RNN training task for fuzzer %s aborted due to small corpus.'
                % fuzzer_name)
        else:
            logs.log_error(
                'ML RNN training task for fuzzer %s failed with ExitCode = %d.'
                % (fuzzer_name, result.return_code),
                output=utils.decode_to_unicode(result.output))
        return

    # Timing out may be caused by large training corpus, but intermediate models
    # are frequently saved and can be uploaded.
    if result.timed_out:
        logs.log_warn('ML RNN training task for %s timed out.' % fuzzer_name)

    upload_model_to_gcs(model_directory, fuzzer_name)