Exemple #1
0
def _wait_for_operation(response, project, zone):
  """Wait for the given operation to complete."""
  if 'status' in response and response['status'] == 'DONE':
    return

  if 'kind' not in response or response['kind'] != 'compute#operation':
    logs.log_error('Compute api response not an operation.')
    return

  api = _get_api()
  operation = response['name']
  start_time = datetime.datetime.utcnow()

  while not dates.time_has_expired(start_time, seconds=OPERATION_TIMEOUT):
    operation_func = api.zoneOperations().get(
        operation=operation, project=project, zone=zone)
    response = _execute_api_call_with_retries(operation_func)

    if 'status' not in response:
      logs.log_error('Invalid compute engine operation %s.' % str(operation))
      return

    if response['status'] == 'DONE':
      return

    time.sleep(POLL_INTERVAL)

  logs.log_error('Compute engine operation %s timed out.' % str(operation))
Exemple #2
0
    def check_public_testcase(self, blob_info, testcase):
        """Check public testcase."""
        if blob_info.key() != testcase.minimized_keys:
            return False

        if not testcase.bug_information:
            return False

        issue_tracker = issue_tracker_utils.get_issue_tracker_for_testcase(
            testcase)
        issue = issue_tracker.get_issue(testcase.bug_information)
        if not issue:
            return False

        # If the issue is explicitly marked as view restricted to committers only
        # (OSS-Fuzz only), then don't allow public download.
        if 'restrict-view-commit' in issue.labels:
            return False

        # For OSS-Fuzz, delay the disclosure of the reproducer by 30 days.
        # If the deadline had previously exceeded, the reproducer was made public
        # already so exclude that case.
        if (utils.is_oss_fuzz() and 'deadline-exceeded' not in issue.labels
                and issue.closed_time and not dates.time_has_expired(
                    issue.closed_time, days=_OSS_FUZZ_REPRODUCER_DELAY)):
            return False

        return True
Exemple #3
0
def wait_until_good_state():
  """Check battery and make sure it is charged beyond minimum level and
  temperature thresholds."""
  # Battery levels are not applicable on GCE.
  if environment.is_android_cuttlefish() or settings.is_automotive():
    return

  # Make sure device is online.
  adb.wait_for_device()

  # Skip battery check if done recently.
  last_battery_check_time = persistent_cache.get_value(
      LAST_BATTERY_CHECK_TIME_KEY,
      constructor=datetime.datetime.utcfromtimestamp)
  if last_battery_check_time and not dates.time_has_expired(
      last_battery_check_time, seconds=BATTERY_CHECK_INTERVAL):
    return

  # Initialize variables.
  battery_level_threshold = environment.get_value('LOW_BATTERY_LEVEL_THRESHOLD',
                                                  LOW_BATTERY_LEVEL_THRESHOLD)
  battery_temperature_threshold = environment.get_value(
      'MAX_BATTERY_TEMPERATURE_THRESHOLD', MAX_BATTERY_TEMPERATURE_THRESHOLD)
  device_restarted = False

  while True:
    battery_information = get_battery_level_and_temperature()
    if battery_information is None:
      logs.log_error('Failed to get battery information, skipping check.')
      return

    battery_level = battery_information['level']
    battery_temperature = battery_information['temperature']
    logs.log('Battery information: level (%d%%), temperature (%.1f celsius).' %
             (battery_level, battery_temperature))
    if (battery_level >= battery_level_threshold and
        battery_temperature <= battery_temperature_threshold):
      persistent_cache.set_value(LAST_BATTERY_CHECK_TIME_KEY, time.time())
      return

    logs.log('Battery in bad battery state, putting device in sleep mode.')

    if not device_restarted:
      adb.reboot()
      device_restarted = True

    # Change thresholds to expected levels (only if they were below minimum
    # thresholds).
    if battery_level < battery_level_threshold:
      battery_level_threshold = EXPECTED_BATTERY_LEVEL
    if battery_temperature > battery_temperature_threshold:
      battery_temperature_threshold = EXPECTED_BATTERY_TEMPERATURE

    # Stopping shell should help with shutting off a lot of services that would
    # otherwise use up the battery. However, we need to turn it back on to get
    # battery status information.
    adb.stop_shell()
    time.sleep(BATTERY_CHARGE_INTERVAL)
    adb.start_shell()
Exemple #4
0
def update_tests_if_needed():
    """Updates layout tests every day."""
    data_directory = environment.get_value('FUZZ_DATA')
    error_occured = False
    expected_task_duration = 60 * 60  # 1 hour.
    retry_limit = environment.get_value('FAIL_RETRIES')
    temp_archive = os.path.join(data_directory, 'temp.zip')
    tests_url = environment.get_value('WEB_TESTS_URL')

    # Check if we have a valid tests url.
    if not tests_url:
        return

    # Layout test updates are usually disabled to speedup local testing.
    if environment.get_value('LOCAL_DEVELOPMENT'):
        return

    # |UPDATE_WEB_TESTS| env variable can be used to control our update behavior.
    if not environment.get_value('UPDATE_WEB_TESTS'):
        return

    last_modified_time = persistent_cache.get_value(
        TESTS_LAST_UPDATE_KEY, constructor=datetime.datetime.utcfromtimestamp)
    if (last_modified_time is not None
            and not dates.time_has_expired(last_modified_time,
                                           days=TESTS_UPDATE_INTERVAL_DAYS)):
        return

    logs.log('Updating layout tests.')
    tasks.track_task_start(tasks.Task('update_tests', '', ''),
                           expected_task_duration)

    # Download and unpack the tests archive.
    for _ in range(retry_limit):
        try:
            shell.remove_directory(data_directory, recreate=True)
            storage.copy_file_from(tests_url, temp_archive)
            archive.unpack(temp_archive, data_directory, trusted=True)
            shell.remove_file(temp_archive)
            error_occured = False
            break
        except:
            logs.log_error(
                'Could not retrieve and unpack layout tests archive. Retrying.'
            )
            error_occured = True

    if not error_occured:
        persistent_cache.set_value(TESTS_LAST_UPDATE_KEY,
                                   time.time(),
                                   persist_across_reboots=True)

    tasks.track_task_end()
Exemple #5
0
def beat(previous_state, log_filename):
    """Run a cycle of heartbeat checks to ensure bot is running."""
    # Handle case when run_bot.py script is stuck. If yes, kill its process.
    task_end_time = tasks.get_task_end_time()
    if psutil and task_end_time and dates.time_has_expired(
            task_end_time, seconds=tasks.TASK_COMPLETION_BUFFER):

        # Get absolute path to |run_bot| script. We use this to identify unique
        # instances of bot running on a particular host.
        startup_scripts_directory = environment.get_startup_scripts_directory()
        bot_file_path = os.path.join(startup_scripts_directory, 'run_bot')

        for process in psutil.process_iter():
            try:
                command_line = ' '.join(process.cmdline())
            except (psutil.AccessDenied, psutil.NoSuchProcess, OSError):
                continue

            # Find the process running the main bot script.
            if bot_file_path not in command_line:
                continue

            process_id = process.pid
            logs.log('Killing stale bot (pid %d) which seems to have stuck.' %
                     process_id)
            try:
                process_handler.terminate_root_and_child_processes(process_id)
            except Exception:
                logs.log_error('Failed to terminate stale bot processes.')

        # Minor cleanup to avoid disk space issues on bot restart.
        process_handler.terminate_stale_application_instances()
        shell.clear_temp_directory()
        shell.clear_testcase_directories()

        # Concerned stale processes should be killed. Now, delete the stale task.
        tasks.track_task_end()

    # Figure out when the log file was last modified.
    try:
        current_state = str(os.path.getmtime(log_filename))
    except Exception:
        current_state = None

    # Only update the heartbeat if the log file was modified.
    if current_state and current_state != previous_state:
        # Try updating the heartbeat. If an error occurs, just
        # wait and return None.
        if not data_handler.update_heartbeat():
            return None
        # Heartbeat is successfully updated.

    return current_state
Exemple #6
0
def add_test_accounts_if_needed():
    """Add test account to work with GmsCore, etc."""
    last_test_account_check_time = persistent_cache.get_value(
        constants.LAST_TEST_ACCOUNT_CHECK_KEY,
        constructor=datetime.datetime.utcfromtimestamp)
    needs_test_account_update = (last_test_account_check_time is None
                                 or dates.time_has_expired(
                                     last_test_account_check_time,
                                     seconds=ADD_TEST_ACCOUNT_CHECK_INTERVAL))
    if not needs_test_account_update:
        return

    config = db_config.get()
    if not config:
        return

    test_account_email = config.test_account_email
    test_account_password = config.test_account_password
    if not test_account_email or not test_account_password:
        return

    adb.run_as_root()
    wifi.configure(force_enable=True)

    if not app.is_installed(ADD_TEST_ACCOUNT_PKG_NAME):
        logs.log('Installing helper apk for adding test account.')
        android_directory = environment.get_platform_resources_directory()
        add_test_account_apk_path = os.path.join(android_directory,
                                                 ADD_TEST_ACCOUNT_APK_NAME)
        app.install(add_test_account_apk_path)

    logs.log('Trying to add test account.')
    output = adb.run_shell_command(
        'am instrument -e account %s -e password %s -w %s' %
        (test_account_email, test_account_password,
         ADD_TEST_ACCOUNT_CALL_PATH),
        timeout=ADD_TEST_ACCOUNT_TIMEOUT)
    if not output or test_account_email not in output:
        logs.log('Failed to add test account, probably due to wifi issues.')
        return

    logs.log('Test account added successfully.')
    persistent_cache.set_value(constants.LAST_TEST_ACCOUNT_CHECK_KEY,
                               time.time())
Exemple #7
0
def _is_data_bundle_up_to_date(data_bundle, data_bundle_directory):
    """Return true if the data bundle is up to date, false otherwise."""
    sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory)

    if environment.is_trusted_host() and data_bundle.sync_to_worker:
        from clusterfuzz._internal.bot.untrusted_runner import file_host
        worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path)
        shell.remove_file(sync_file_path)
        file_host.copy_file_from_worker(worker_sync_file_path, sync_file_path)

    if not os.path.exists(sync_file_path):
        return False

    last_sync_time = datetime.datetime.utcfromtimestamp(
        utils.read_data_from_file(sync_file_path))

    # Check if we recently synced.
    if not dates.time_has_expired(
            last_sync_time, seconds=_DATA_BUNDLE_SYNC_INTERVAL_IN_SECONDS):
        return True

    # For search index data bundle, we don't sync them from bucket. Instead, we
    # rely on the fuzzer to generate testcases periodically.
    if _is_search_index_data_bundle(data_bundle.name):
        return False

    # Check when the bucket url had last updates. If no new updates, no need to
    # update directory.
    bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name)
    last_updated_time = storage.last_updated(bucket_url)
    if last_updated_time and last_sync_time > last_updated_time:
        logs.log('Data bundle %s has no new content from last sync.' %
                 data_bundle.name)
        return True

    return False
Exemple #8
0
  def get(self):
    """Handle a get request."""
    try:
      grouper.group_testcases()
    except:
      logs.log_error('Error occurred while grouping test cases.')
      return

    # Free up memory after group task run.
    utils.python_gc()

    # Get a list of jobs excluded from bug filing.
    excluded_jobs = _get_excluded_jobs()

    # Get a list of all jobs. This is used to filter testcases whose jobs have
    # been removed.
    all_jobs = data_handler.get_all_job_type_names()

    for testcase_id in data_handler.get_open_testcase_id_iterator():
      try:
        testcase = data_handler.get_testcase_by_id(testcase_id)
      except errors.InvalidTestcaseError:
        # Already deleted.
        continue

      # Skip if testcase's job is removed.
      if testcase.job_type not in all_jobs:
        continue

      # Skip if testcase's job is in exclusions list.
      if testcase.job_type in excluded_jobs:
        continue

      # Skip if we are running progression task at this time.
      if testcase.get_metadata('progression_pending'):
        continue

      # If the testcase has a bug filed already, no triage is needed.
      if _is_bug_filed(testcase):
        continue

      # Check if the crash is important, i.e. it is either a reproducible crash
      # or an unreproducible crash happening frequently.
      if not _is_crash_important(testcase):
        continue

      # Require that all tasks like minimizaton, regression testing, etc have
      # finished.
      if not data_handler.critical_tasks_completed(testcase):
        continue

      # For testcases that are not part of a group, wait an additional time to
      # make sure it is grouped.
      # The grouper runs prior to this step in the same cron, but there is a
      # window of time where new testcases can come in after the grouper starts.
      # This delay needs to be longer than the maximum time the grouper can take
      # to account for that.
      # FIXME: In future, grouping might be dependent on regression range, so we
      # would have to add an additional wait time.
      if not testcase.group_id and not dates.time_has_expired(
          testcase.timestamp, hours=data_types.MIN_ELAPSED_TIME_SINCE_REPORT):
        continue

      # If this project does not have an associated issue tracker, we cannot
      # file this crash anywhere.
      issue_tracker = issue_tracker_utils.get_issue_tracker_for_testcase(
          testcase)
      if not issue_tracker:
        issue_filer.notify_issue_update(testcase, 'new')
        continue

      # If there are similar issues to this test case already filed or recently
      # closed, skip filing a duplicate bug.
      if _check_and_update_similar_bug(testcase, issue_tracker):
        continue

      # Clean up old triage messages that would be not applicable now.
      testcase.delete_metadata(TRIAGE_MESSAGE_KEY, update_testcase=False)

      # File the bug first and then create filed bug metadata.
      try:
        issue_filer.file_issue(testcase, issue_tracker)
      except Exception as e:
        logs.log_error('Failed to file issue for testcase %d.' % testcase_id)
        _add_triage_message(testcase,
                            f'Failed to file issue due to exception: {str(e)}')

        continue

      _create_filed_bug_metadata(testcase)
      issue_filer.notify_issue_update(testcase, 'new')

      logs.log('Filed new issue %s for testcase %d.' %
               (testcase.bug_information, testcase_id))
Exemple #9
0
def _check_and_update_similar_bug(testcase, issue_tracker):
  """Get list of similar open issues and ones that were recently closed."""
  # Get similar testcases from the same group.
  similar_testcases_from_group = []
  if testcase.group_id:
    group_query = data_types.Testcase.query(
        data_types.Testcase.group_id == testcase.group_id)
    similar_testcases_from_group = ndb_utils.get_all_from_query(
        group_query, batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT // 2)

  # Get testcases with the same crash params. These might not be in the a group
  # if they were just fixed.
  same_crash_params_query = data_types.Testcase.query(
      data_types.Testcase.crash_type == testcase.crash_type,
      data_types.Testcase.crash_state == testcase.crash_state,
      data_types.Testcase.security_flag == testcase.security_flag,
      data_types.Testcase.project_name == testcase.project_name,
      data_types.Testcase.status == 'Processed')

  similar_testcases_from_query = ndb_utils.get_all_from_query(
      same_crash_params_query,
      batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT // 2)
  for similar_testcase in itertools.chain(similar_testcases_from_group,
                                          similar_testcases_from_query):
    # Exclude ourself from comparison.
    if similar_testcase.key.id() == testcase.key.id():
      continue

    # Exclude similar testcases without bug information.
    if not similar_testcase.bug_information:
      continue

    # Get the issue object given its ID.
    issue = issue_tracker.get_issue(similar_testcase.bug_information)
    if not issue:
      continue

    # If the reproducible issue is not verified yet, bug is still valid and
    # might be caused by non-availability of latest builds. In that case,
    # don't file a new bug yet.
    if similar_testcase.open and not similar_testcase.one_time_crasher_flag:
      return True

    # If the issue is still open, no need to file a duplicate bug.
    if issue.is_open:
      return True

    # If the issue indicates that this crash needs to be ignored, no need to
    # file another one.
    policy = issue_tracker_policy.get(issue_tracker.project)
    ignore_label = policy.label('ignore')
    if ignore_label in issue.labels:
      _add_triage_message(
          testcase,
          ('Skipping filing a bug since similar testcase ({testcase_id}) in '
           'issue ({issue_id}) is blacklisted with {ignore_label} label.'
          ).format(
              testcase_id=similar_testcase.key.id(),
              issue_id=issue.id,
              ignore_label=ignore_label))
      return True

    # If the issue is recently closed, wait certain time period to make sure
    # our fixed verification has completed.
    if (issue.closed_time and not dates.time_has_expired(
        issue.closed_time, hours=data_types.MIN_ELAPSED_TIME_SINCE_FIXED)):
      _add_triage_message(
          testcase,
          ('Delaying filing a bug since similar testcase '
           '({testcase_id}) in issue ({issue_id}) was just fixed.').format(
               testcase_id=similar_testcase.key.id(), issue_id=issue.id))
      return True

  return False
Exemple #10
0
def flash_to_latest_build_if_needed():
  """Wipes user data, resetting the device to original factory state."""
  if environment.get_value('LOCAL_DEVELOPMENT'):
    # Don't reimage local development devices.
    return

  run_timeout = environment.get_value('RUN_TIMEOUT')
  if run_timeout:
    # If we have a run timeout, then we are already scheduled to bail out and
    # will be probably get re-imaged. E.g. using frameworks like Tradefed.
    return

  # Check if a flash is needed based on last recorded flash time.
  last_flash_time = persistent_cache.get_value(
      constants.LAST_FLASH_TIME_KEY,
      constructor=datetime.datetime.utcfromtimestamp)
  needs_flash = last_flash_time is None or dates.time_has_expired(
      last_flash_time, seconds=FLASH_INTERVAL)
  if not needs_flash:
    return

  is_google_device = settings.is_google_device()
  if is_google_device is None:
    logs.log_error('Unable to query device. Reimaging failed.')
    adb.bad_state_reached()

  elif not is_google_device:
    # We can't reimage these, skip.
    logs.log('Non-Google device found, skipping reimage.')
    return

  # Check if both |BUILD_BRANCH| and |BUILD_TARGET| environment variables
  # are set. If not, we don't have enough data for reimaging and hence
  # we bail out.
  branch = environment.get_value('BUILD_BRANCH')
  target = environment.get_value('BUILD_TARGET')
  if not target:
    # We default to userdebug configuration.
    build_params = settings.get_build_parameters()
    if build_params:
      target = build_params.get('target') + '-userdebug'

      # Cache target in environment. This is also useful for cases when
      # device is bricked and we don't have this information available.
      environment.set_value('BUILD_TARGET', target)

  if not branch or not target:
    logs.log_warn(
        'BUILD_BRANCH and BUILD_TARGET are not set, skipping reimage.')
    return

  image_directory = environment.get_value('IMAGES_DIR')
  build_info = fetch_artifact.get_latest_artifact_info(branch, target)
  if not build_info:
    logs.log_error('Unable to fetch information on latest build artifact for '
                   'branch %s and target %s.' % (branch, target))
    return

  if environment.is_android_cuttlefish():
    download_latest_build(build_info, FLASH_CUTTLEFISH_REGEXES, image_directory)
    adb.recreate_cuttlefish_device()
    adb.connect_to_cuttlefish_device()
  else:
    download_latest_build(build_info, FLASH_IMAGE_REGEXES, image_directory)
    # We do one device flash at a time on one host, otherwise we run into
    # failures and device being stuck in a bad state.
    flash_lock_key_name = 'flash:%s' % socket.gethostname()
    if not locks.acquire_lock(flash_lock_key_name, by_zone=True):
      logs.log_error('Failed to acquire lock for reimaging, exiting.')
      return

    logs.log('Reimaging started.')
    logs.log('Rebooting into bootloader mode.')
    for _ in range(FLASH_RETRIES):
      adb.run_as_root()
      adb.run_command(['reboot-bootloader'])
      time.sleep(FLASH_REBOOT_BOOTLOADER_WAIT)
      adb.run_fastboot_command(['oem', 'off-mode-charge', '0'])
      adb.run_fastboot_command(['-w', 'reboot-bootloader'])

      for partition, partition_image_filename in FLASH_IMAGE_FILES:
        partition_image_file_path = os.path.join(image_directory,
                                                 partition_image_filename)
        adb.run_fastboot_command(
            ['flash', partition, partition_image_file_path])
        if partition in ['bootloader', 'radio']:
          adb.run_fastboot_command(['reboot-bootloader'])

      # Disable ramdump to avoid capturing ramdumps during kernel crashes.
      # This causes device lockup of several minutes during boot and we intend
      # to analyze them ourselves.
      adb.run_fastboot_command(['oem', 'ramdump', 'disable'])

      adb.run_fastboot_command('reboot')
      time.sleep(FLASH_REBOOT_WAIT)

      if adb.get_device_state() == 'device':
        break
      logs.log_error('Reimaging failed, retrying.')

    locks.release_lock(flash_lock_key_name, by_zone=True)

  if adb.get_device_state() != 'device':
    logs.log_error('Unable to find device. Reimaging failed.')
    adb.bad_state_reached()

  logs.log('Reimaging finished.')

  # Reset all of our persistent keys after wipe.
  persistent_cache.delete_value(constants.BUILD_PROP_MD5_KEY)
  persistent_cache.delete_value(constants.LAST_TEST_ACCOUNT_CHECK_KEY)
  persistent_cache.set_value(constants.LAST_FLASH_BUILD_KEY, build_info)
  persistent_cache.set_value(constants.LAST_FLASH_TIME_KEY, time.time())