def _wait_for_operation(response, project, zone): """Wait for the given operation to complete.""" if 'status' in response and response['status'] == 'DONE': return if 'kind' not in response or response['kind'] != 'compute#operation': logs.log_error('Compute api response not an operation.') return api = _get_api() operation = response['name'] start_time = datetime.datetime.utcnow() while not dates.time_has_expired(start_time, seconds=OPERATION_TIMEOUT): operation_func = api.zoneOperations().get( operation=operation, project=project, zone=zone) response = _execute_api_call_with_retries(operation_func) if 'status' not in response: logs.log_error('Invalid compute engine operation %s.' % str(operation)) return if response['status'] == 'DONE': return time.sleep(POLL_INTERVAL) logs.log_error('Compute engine operation %s timed out.' % str(operation))
def delete_unreproducible_testcase_with_no_issue(testcase): """Delete an unreproducible testcase if it has no associated issue and has been open for a certain time interval.""" # Make sure that this testcase is an unreproducible bug. If not, bail out. if not testcase.one_time_crasher_flag: return # Make sure that this testcase has no associated bug. If not, bail out. if testcase.bug_information: return # Make sure that testcase is atleast older than # |UNREPRODUCIBLE_TESTCASE_NO_BUG_DEADLINE|, otherwise it will be seen in # crash stats anyway. if (testcase.timestamp and not dates.time_has_expired( testcase.timestamp, days=data_types.UNREPRODUCIBLE_TESTCASE_NO_BUG_DEADLINE)): return # Make sure that testcase is not seen in crash stats for a certain time # interval. if get_crash_occurrence_platforms( testcase, data_types.UNREPRODUCIBLE_TESTCASE_NO_BUG_DEADLINE): return testcase.key.delete() logs.log('Deleted unreproducible testcase %d with no issue.' % testcase.key.id())
def update_heartbeat(force_update=False): """Updates heartbeat with current timestamp and log data.""" # Check if the heartbeat was recently updated. If yes, bail out. last_modified_time = persistent_cache.get_value( HEARTBEAT_LAST_UPDATE_KEY, constructor=datetime.datetime.utcfromtimestamp) if (not force_update and last_modified_time and not dates.time_has_expired( last_modified_time, seconds=data_types.HEARTBEAT_WAIT_INTERVAL)): return 0 bot_name = environment.get_value('BOT_NAME') current_time = datetime.datetime.utcnow() try: heartbeat = ndb.Key(data_types.Heartbeat, bot_name).get() if not heartbeat: heartbeat = data_types.Heartbeat() heartbeat.bot_name = bot_name heartbeat.key = ndb.Key(data_types.Heartbeat, bot_name) heartbeat.task_payload = tasks.get_task_payload() heartbeat.task_end_time = tasks.get_task_end_time() heartbeat.last_beat_time = current_time heartbeat.source_version = utils.current_source_version() heartbeat.put() persistent_cache.set_value( HEARTBEAT_LAST_UPDATE_KEY, time.time(), persist_across_reboots=True) except: logs.log_error('Unable to update heartbeat.') return 0 return 1
def mark_testcase_as_closed_if_issue_is_closed(testcase, issue): """Mark testcase as closed if the associated issue is closed.""" # If the testcase is already closed, no more work to do. if not testcase.open: return # If there is no associated issue, then bail out. if not issue or not testcase.bug_information: return # If the issue is still open, no work needs to be done. Bail out. if issue.open: return # Make sure we passed our deadline based on issue closed timestamp. if (issue.closed and not dates.time_has_expired( issue.closed, days=data_types.CLOSE_TESTCASE_WITH_CLOSED_BUG_DEADLINE)): return # If the issue has an ignore label, don't close the testcase and bail out. # This helps to prevent new bugs from getting filed for legit WontFix cases. if issue.has_comment_with_label(data_types.ISSUE_IGNORE_LABEL): return testcase.open = False testcase.fixed = 'NA' testcase.put() logs.log('Closed testcase %d with issue closed.' % testcase.key.id())
def mark_testcase_as_closed_if_issue_is_closed(policy, testcase, issue): """Mark testcase as closed if the associated issue is closed.""" # If the testcase is already closed, no more work to do. if not testcase.open: return # If there is no associated issue, then bail out. if not issue or not testcase.bug_information: return # If the issue is still open, no work needs to be done. Bail out. if issue.is_open: return # Make sure we passed our deadline based on issue closed timestamp. if issue.closed_time and not dates.time_has_expired( issue.closed_time, days=data_types.CLOSE_TESTCASE_WITH_CLOSED_BUG_DEADLINE): return # If the issue has an ignore label, don't close the testcase and bail out. # This helps to prevent new bugs from getting filed for legit WontFix cases. if issue_tracker_utils.was_label_added(issue, policy.label("ignore")): return testcase.open = False testcase.fixed = "NA" testcase.put() logs.log("Closed testcase %d with issue closed." % testcase.key.id())
def wait_until_good_state(): """Check battery and make sure it is charged beyond minimum level and temperature thresholds.""" # Battery levels are not applicable on GCE. if adb.is_gce(): return # Make sure device is online. adb.wait_for_device() # Skip battery check if done recently. last_battery_check_time = persistent_cache.get_value( LAST_BATTERY_CHECK_TIME_KEY, constructor=datetime.datetime.utcfromtimestamp) if last_battery_check_time and not dates.time_has_expired( last_battery_check_time, seconds=BATTERY_CHECK_INTERVAL): return # Initialize variables. battery_level_threshold = environment.get_value("LOW_BATTERY_LEVEL_THRESHOLD", LOW_BATTERY_LEVEL_THRESHOLD) battery_temperature_threshold = environment.get_value( "MAX_BATTERY_TEMPERATURE_THRESHOLD", MAX_BATTERY_TEMPERATURE_THRESHOLD) device_restarted = False while True: battery_information = get_battery_level_and_temperature() if battery_information is None: logs.log_error("Failed to get battery information, skipping check.") return battery_level = battery_information["level"] battery_temperature = battery_information["temperature"] logs.log("Battery information: level (%d%%), temperature (%.1f celsius)." % (battery_level, battery_temperature)) if (battery_level >= battery_level_threshold and battery_temperature <= battery_temperature_threshold): persistent_cache.set_value(LAST_BATTERY_CHECK_TIME_KEY, time.time()) return logs.log("Battery in bad battery state, putting device in sleep mode.") if not device_restarted: adb.reboot() device_restarted = True # Change thresholds to expected levels (only if they were below minimum # thresholds). if battery_level < battery_level_threshold: battery_level_threshold = EXPECTED_BATTERY_LEVEL if battery_temperature > battery_temperature_threshold: battery_temperature_threshold = EXPECTED_BATTERY_TEMPERATURE # Stopping shell should help with shutting off a lot of services that would # otherwise use up the battery. However, we need to turn it back on to get # battery status information. adb.stop_shell() time.sleep(BATTERY_CHARGE_INTERVAL) adb.start_shell()
def notify_closed_issue_if_testcase_is_open(testcase, issue): """Notify closed issue if associated testcase is still open after a certain time period.""" # If the testcase is already closed, no more work to do. if not testcase.open: return # Check testcase status, so as to skip unreproducible uploads. if testcase.status not in ['Processed', 'Duplicate']: return # If there is no associated issue, then bail out. if not issue or not testcase.bug_information: return # If the issue is still open, no work needs to be done. Bail out. if issue.is_open: return # If we have already passed our deadline based on issue closed timestamp, # no need to notify. We will close the testcase instead. if (issue.closed_time and not dates.time_has_expired( issue.closed_time, days=data_types.NOTIFY_CLOSED_BUG_WITH_OPEN_TESTCASE_DEADLINE)): return # Check if there is ignore label on issue already. If yes, bail out. if issue_tracker_utils.was_label_added(issue, data_types.ISSUE_IGNORE_LABEL): return # Check if we did add the notification comment already. If yes, bail out. if issue_tracker_utils.was_label_added( issue, data_types.ISSUE_NEEDS_FEEDBACK_LABEL): return issue.labels.add(data_types.ISSUE_NEEDS_FEEDBACK_LABEL) if issue.status in ['Fixed', 'Verified']: issue_comment = ( 'ClusterFuzz testcase %d is still reproducing on tip-of-tree build ' '(trunk).\n\nPlease re-test your fix against this testcase and if the ' 'fix was incorrect or incomplete, please re-open the bug. Otherwise, ' 'ignore this notification and add %s label.' % (testcase.key.id(), data_types.ISSUE_MISTRIAGED_LABEL)) else: # Covers WontFix, Archived cases. issue_comment = ( 'ClusterFuzz testcase %d is still reproducing on tip-of-tree build ' '(trunk).\n\nIf this testcase was not reproducible locally or ' 'unworkable, ignore this notification and we will file another ' 'bug soon with hopefully a better and workable testcase.\n\n' 'Otherwise, if this is not intended to be fixed (e.g. this is an ' 'intentional crash), please add %s label to prevent future bug filing ' 'with similar crash stacktrace.' % (testcase.key.id(), data_types.ISSUE_IGNORE_LABEL)) issue.save(new_comment=issue_comment, notify=True) logs.log('Notified closed issue for open testcase %d.' % testcase.key.id())
def beat(previous_state, log_filename): """Run a cycle of heartbeat checks to ensure bot is running.""" # Handle case when run_bot.py script is stuck. If yes, kill its process. task_end_time = tasks.get_task_end_time() if psutil and task_end_time and dates.time_has_expired( task_end_time, seconds=tasks.TASK_COMPLETION_BUFFER): # Get absolute path to |run_bot| script. We use this to identify unique # instances of bot running on a particular host. startup_scripts_directory = environment.get_startup_scripts_directory() bot_file_path = os.path.join(startup_scripts_directory, 'run_bot') for process in psutil.process_iter(): try: command_line = ' '.join(process.cmdline()) except (psutil.AccessDenied, psutil.NoSuchProcess, OSError): sys.exc_clear() continue # Find the process running the main bot script. if bot_file_path not in command_line: continue process_id = process.pid logs.log('Killing stale bot (pid %d) which seems to have stuck.' % process_id) try: process_handler.terminate_root_and_child_processes(process_id) except Exception: logs.log_error('Failed to terminate stale bot processes.') # Minor cleanup to avoid disk space issues on bot restart. process_handler.terminate_stale_application_instances() shell.clear_temp_directory() shell.clear_testcase_directories() # Concerned stale processes should be killed. Now, delete the stale task. tasks.track_task_end() # Figure out when the log file was last modified. try: current_state = str(os.path.getmtime(log_filename)) except Exception: current_state = None logs.log('Old state %s, current state %s.' % (previous_state, current_state)) # Only update the heartbeat if the log file was modified. if current_state and current_state != previous_state: # Try updating the heartbeat. If an error occurs, just # wait and return None. if not data_handler.update_heartbeat(): return None # Heartbeat is successfully updated. return current_state
def update_tests_if_needed(): """Updates layout tests every day.""" data_directory = environment.get_value('FUZZ_DATA') error_occured = False expected_task_duration = 60 * 60 # 1 hour. retry_limit = environment.get_value('FAIL_RETRIES') temp_archive = os.path.join(data_directory, 'temp.zip') tests_url = environment.get_value('WEB_TESTS_URL') # Check if we have a valid tests url. if not tests_url: return # Layout test updates are usually disabled to speedup local testing. if environment.get_value('LOCAL_DEVELOPMENT'): return # |UPDATE_WEB_TESTS| env variable can be used to control our update behavior. if not environment.get_value('UPDATE_WEB_TESTS'): return last_modified_time = persistent_cache.get_value( LAYOUT_TEST_LAST_UPDATE_KEY, constructor=datetime.datetime.utcfromtimestamp) if (last_modified_time is not None and not dates.time_has_expired( last_modified_time, days=LAYOUT_TEST_UPDATE_INTERVAL_DAYS)): return logs.log('Updating layout tests.') tasks.track_task_start(tasks.Task('update_tests', '', ''), expected_task_duration) # Download and unpack the tests archive. for _ in xrange(retry_limit): try: shell.remove_directory(data_directory, recreate=True) storage.copy_file_from(tests_url, temp_archive) archive.unpack(temp_archive, data_directory, trusted=True) shell.remove_file(temp_archive) error_occured = False break except: logs.log_error( 'Could not retrieve and unpack layout tests archive. Retrying.' ) error_occured = True if not error_occured: persistent_cache.set_value(LAYOUT_TEST_LAST_UPDATE_KEY, time.time(), persist_across_reboots=True) tasks.track_task_end()
def _associate_testcase_with_existing_issue_if_needed(testcase, similar_testcase, issue): """Associate an interesting testcase with an existing issue which is already associated with an uninteresting testcase of similar crash signature if: 1. The current testcase is interesting as it is: a. Fully reproducible AND b. No other reproducible testcase is open and attached to issue. 3. Similar testcase attached to existing issue is uninteresting as it is: a. Either unreproducible (but filed since it occurs frequently) OR b. Got closed due to flakiness, but developer has re-opened the issue.""" # Don't change existing bug mapping if any. if testcase.bug_information: return # If this testcase is not reproducible, no need to update bug mapping. if testcase.one_time_crasher_flag: return # If another reproducible testcase is open and attached to this issue, then # no need to update bug mapping. if data_types.Testcase.query( data_types.Testcase.bug_information == str(issue.id), ndb_utils.is_true(data_types.Testcase.open), ndb_utils.is_false( data_types.Testcase.one_time_crasher_flag)).get(): return # If similar testcase is reproducible, make sure that it is not recently # closed. If it is, it means we might have not verified the testcase itself # as well, so need to give this for testcase to close as well. if not similar_testcase.open and not similar_testcase.one_time_crasher_flag: closed_time = similar_testcase.get_metadata('closed_time') if not closed_time: return if not dates.time_has_expired( closed_time, hours=data_types.MIN_ELAPSED_TIME_SINCE_FIXED): return testcase_id = testcase.key.id() report_url = data_handler.TESTCASE_REPORT_URL.format( domain=data_handler.get_domain(), testcase_id=testcase_id) comment = ('ClusterFuzz found another reproducible variant for this ' 'bug on {job_type} job: {report_url}.').format( job_type=testcase.job_type, report_url=report_url) issue.save(new_comment=comment, notify=True) testcase = data_handler.get_testcase_by_id(testcase_id) testcase.bug_information = str(issue.id) testcase.group_bug_information = 0 testcase.put()
def remove_unused_builds(): """Remove any builds that are no longer in use by this bot.""" builds_directory = environment.get_value('BUILDS_DIR') last_checked_time = persistent_cache.get_value( LAST_UNUSED_BUILD_CHECK_KEY, constructor=datetime.datetime.utcfromtimestamp) if (last_checked_time is not None and not dates.time_has_expired(last_checked_time, days=1)): return # Initialize the map with all of our build directories. build_in_use_map = {} for build_directory in os.listdir(builds_directory): absolute_build_directory = os.path.join(builds_directory, build_directory) if os.path.isdir(absolute_build_directory): build_in_use_map[absolute_build_directory] = False # Platforms for jobs may come from the queue override, but use the default # if no override is present. job_platform = environment.get_platform_group() jobs_for_platform = ndb_utils.get_all_from_query( data_types.Job.query(data_types.Job.platform == job_platform)) for job in jobs_for_platform: job_environment = job.get_environment() # Do not attempt to process any incomplete job definitions. if not job_environment: continue for key, value in job_environment.iteritems(): if 'BUILD_BUCKET_PATH' in key: bucket_path = value elif key == 'CUSTOM_BINARY' and value != 'False': bucket_path = None else: continue # If we made it to this point, this build is potentially in use. build_directory = _get_build_directory(bucket_path, job.name) if build_directory in build_in_use_map: build_in_use_map[build_directory] = True for build_directory, in_use in build_in_use_map.iteritems(): if in_use: continue # Remove the build. logs.log('Removing unused build directory: %s' % build_directory) shell.remove_directory(build_directory) persistent_cache.set_value(LAST_UNUSED_BUILD_CHECK_KEY, time.time())
def _try_update_status(): """Try update metadata.""" task_status = get_task_status(task_name, create_if_needed=True) # If another bot is already working on this task, bail out with error. if (status == data_types.TaskState.STARTED and task_status.status == data_types.TaskState.STARTED and not dates.time_has_expired( task_status.time, seconds=expiry_interval - 1)): return False task_status.bot_name = bot_name task_status.status = status task_status.time = utils.utcnow() task_status.put() return True
def add_test_accounts_if_needed(): """Add test account to work with GmsCore, etc.""" last_test_account_check_time = persistent_cache.get_value( constants.LAST_TEST_ACCOUNT_CHECK_KEY, constructor=datetime.datetime.utcfromtimestamp) needs_test_account_update = (last_test_account_check_time is None or dates.time_has_expired( last_test_account_check_time, seconds=ADD_TEST_ACCOUNT_CHECK_INTERVAL)) if not needs_test_account_update: return config = db_config.get() if not config: return test_account_email = config.test_account_email test_account_password = config.test_account_password if not test_account_email or not test_account_password: return adb.run_as_root() wifi.configure(force_enable=True) if not app.is_installed(ADD_TEST_ACCOUNT_PKG_NAME): logs.log('Installing helper apk for adding test account.') android_directory = environment.get_platform_resources_directory() add_test_account_apk_path = os.path.join(android_directory, ADD_TEST_ACCOUNT_APK_NAME) app.install(add_test_account_apk_path) logs.log('Trying to add test account.') output = adb.run_shell_command( 'am instrument -e account %s -e password %s -w %s' % (test_account_email, test_account_password, ADD_TEST_ACCOUNT_CALL_PATH), timeout=ADD_TEST_ACCOUNT_TIMEOUT) if not output or test_account_email not in output: logs.log('Failed to add test account, probably due to wifi issues.') return logs.log('Test account added successfully.') persistent_cache.set_value(constants.LAST_TEST_ACCOUNT_CHECK_KEY, time.time())
def bot_run_timed_out(): """Return true if our run timed out.""" run_timeout = environment.get_value('RUN_TIMEOUT') if not run_timeout: return False # Check that we have a valid start time from our heartbeat. bot_name = environment.get_value('BOT_NAME') heartbeat = data_types.Heartbeat.query( data_types.Heartbeat.bot_name == bot_name).get() if not heartbeat or not heartbeat.start_time: return False # Actual run timeout takes off the duration for one task. average_task_duration = environment.get_value('AVERAGE_TASK_DURATION', 0) actual_run_timeout = run_timeout - average_task_duration return dates.time_has_expired( heartbeat.start_time, seconds=actual_run_timeout)
def mark_duplicate_testcase_as_closed_with_no_issue(testcase): """Closes a duplicate testcase if it has no associated issue and has been open for a certain time interval.""" # Make sure that this testcase is a duplicate bug. If not, bail out. if testcase.status != 'Duplicate': return # Make sure that this testcase has no associated bug. If not, bail out. if testcase.bug_information: return # Make sure that testcase has been open for a certain time interval. We do # a null timestamp check since some older testcases could be missing it. if (testcase.timestamp and not dates.time_has_expired( testcase.timestamp, days=data_types.DUPLICATE_TESTCASE_NO_BUG_DEADLINE)): return testcase.fixed = 'NA' testcase.open = False testcase.put() logs.log('Closed duplicate testcase %d with no issue.' % testcase.key.id())
def _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): """Return true if the data bundle is up to date, false otherwise.""" sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory) if environment.is_trusted_host() and data_bundle.sync_to_worker: from bot.untrusted_runner import file_host worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path) shell.remove_file(sync_file_path) file_host.copy_file_from_worker(worker_sync_file_path, sync_file_path) if not os.path.exists(sync_file_path): return False last_sync_time = datetime.datetime.utcfromtimestamp( utils.read_data_from_file(sync_file_path)) # Check if we recently synced. if not dates.time_has_expired( last_sync_time, seconds=_DATA_BUNDLE_SYNC_INTERVAL_IN_SECONDS): return True # For search index data bundle, we don't sync them from bucket. Instead, we # rely on the fuzzer to generate testcases periodically. if _is_search_index_data_bundle(data_bundle.name): return False # Check when the bucket url had last updates. If no new updates, no need to # update directory. bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name) last_updated_time = storage.last_updated(bucket_url) if last_updated_time and last_sync_time > last_updated_time: logs.log("Data bundle %s has no new content from last sync." % data_bundle.name) return True return False
def is_similar_bug_open_or_recently_closed(testcase, issue_tracker_manager): """Get list of similar open issues and ones that were recently closed.""" # Get similar testcases from the same group. similar_testcases_from_group = [] if testcase.group_id: group_query = data_types.Testcase.query( data_types.Testcase.group_id == testcase.group_id) similar_testcases_from_group = ndb_utils.get_all_from_query( group_query, batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT / 2) # Get testcases with the same crash params. These might not be in the a group # if they were just fixed. same_crash_params_query = data_types.Testcase.query( data_types.Testcase.crash_type == testcase.crash_type, data_types.Testcase.crash_state == testcase.crash_state, data_types.Testcase.security_flag == testcase.security_flag, data_types.Testcase.project_name == testcase.project_name, data_types.Testcase.status == 'Processed') similar_testcases_from_query = ndb_utils.get_all_from_query( same_crash_params_query, batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT / 2) for similar_testcase in itertools.chain(similar_testcases_from_group, similar_testcases_from_query): # Exclude ourself from comparison. if similar_testcase.key.id() == testcase.key.id(): continue # Exclude similar testcases without bug information. if not similar_testcase.bug_information: continue # Get the issue object given its ID. issue = issue_tracker_manager.get_issue( similar_testcase.bug_information) if not issue: continue # If the reproducible issue is not verified yet, bug is still valid and # might be caused by non-availability of latest builds. In that case, # don't file a new bug yet. if similar_testcase.open and not similar_testcase.one_time_crasher_flag: return True # If the issue is still open, no need to file a duplicate bug. if issue.open: return True # If the issue indicates that this crash needs to be ignored, no need to # file another one. if issue.has_label(data_types.ISSUE_IGNORE_LABEL): return True # If the issue is recently closed, wait certain time period to make sure # our fixed verification has completed. if (issue.closed and not dates.time_has_expired( issue.closed, compare_to=datetime.datetime.utcnow(), hours=data_types.MIN_ELAPSED_TIME_SINCE_FIXED)): return True return False
def mark_unreproducible_testcase_and_issue_as_closed_after_deadline( testcase, issue): """Closes an unreproducible testcase and its associated issue after a certain time period.""" # If the testcase is already closed, no more work to do. if not testcase.open: return # Check testcase status, so as to skip unreproducible uploads. if testcase.status not in ['Processed', 'Duplicate']: return # Make sure that this testcase is an unreproducible bug. If not, bail out. if not testcase.one_time_crasher_flag: return # Make sure that this testcase has an associated bug. If not, bail out. if not testcase.bug_information: return # If this testcase was manually uploaded, don't change issue state as our # reproduction result might be incorrect. if testcase.uploader_email: return # Make sure that there is an associated bug and it is in open state. if not issue or not issue.open: return # Check if there are any reproducible open testcases are associated with # this bug. If yes, return. similar_testcase = data_types.Testcase.query( data_types.Testcase.bug_information == testcase.bug_information, ndb_utils.is_true(data_types.Testcase.open), ndb_utils.is_false(data_types.Testcase.one_time_crasher_flag)).get() if similar_testcase: return # Make sure that testcase is atleast older than # |UNREPRODUCIBLE_TESTCASE_WITH_BUG_DEADLINE|, otherwise it will be seen in # crash stats anyway. if (testcase.timestamp and not dates.time_has_expired( testcase.timestamp, days=data_types.UNREPRODUCIBLE_TESTCASE_WITH_BUG_DEADLINE)): return # Handle testcase that turned from reproducible to unreproducible. Account # for the recent progression task run time. last_tested_crash_time = testcase.get_metadata('last_tested_crash_time') if (last_tested_crash_time and not dates.time_has_expired( last_tested_crash_time, days=data_types.UNREPRODUCIBLE_TESTCASE_WITH_BUG_DEADLINE)): return # Make that there is no crash seen in the deadline period. if get_crash_occurrence_platforms( testcase, data_types.UNREPRODUCIBLE_TESTCASE_WITH_BUG_DEADLINE): return # As a last check, do the expensive call of actually checking all issue # comments to make sure we we didn't get called out on issue mistriage. if issue.has_comment_with_label(data_types.ISSUE_MISTRIAGED_LABEL): return # Close associated issue and testcase. comment = ('ClusterFuzz testcase %d is flaky and no longer crashes, ' 'so closing issue.' % testcase.key.id()) if utils.is_oss_fuzz(): comment += OSS_FUZZ_INCORRECT_COMMENT else: comment += INTERNAL_INCORRECT_COMMENT comment += ' and re-open the issue.' issue.comment = comment issue.status = 'WontFix' issue.open = False issue.save(send_email=True) testcase.fixed = 'NA' testcase.open = False testcase.put() logs.log('Closed unreproducible testcase %d and associated issue.' % testcase.key.id())
def flash_to_latest_build_if_needed(): """Wipes user data, resetting the device to original factory state.""" if environment.get_value('LOCAL_DEVELOPMENT'): # Don't reimage local development devices. return run_timeout = environment.get_value('RUN_TIMEOUT') if run_timeout: # If we have a run timeout, then we are already scheduled to bail out and # will be probably get re-imaged. E.g. using frameworks like Tradefed. return # Check if a flash is needed based on last recorded flash time. last_flash_time = persistent_cache.get_value( LAST_FLASH_TIME_KEY, constructor=datetime.datetime.utcfromtimestamp) needs_flash = last_flash_time is None or dates.time_has_expired( last_flash_time, seconds=adb.FLASH_INTERVAL) if not needs_flash: return build_info = {} if adb.is_gce(): adb.recreate_gce_device() else: # Physical device. is_google_device = google_device() if is_google_device is None: logs.log_error('Unable to query device. Reimaging failed.') adb.bad_state_reached() elif not is_google_device: # We can't reimage these, skip. logs.log('Non-Google device found, skipping reimage.') return else: # For Google devices. # Check if both |BUILD_BRANCH| and |BUILD_TARGET| environment variables # are set. If not, we don't have enough data for reimaging and hence # we bail out. branch = environment.get_value('BUILD_BRANCH') target = environment.get_value('BUILD_TARGET') if not target: # We default to userdebug configuration. build_params = get_build_parameters() if build_params: target = build_params.get('target') + '-userdebug' # Cache target in environment. This is also useful for cases when # device is bricked and we don't have this information available. environment.set_value('BUILD_TARGET', target) if not branch or not target: logs.log_warn( 'BUILD_BRANCH and BUILD_TARGET are not set, skipping reimage.' ) return # Download the latest build artifact for this branch and target. build_info = fetch_artifact.get_latest_artifact_info( branch, target) if not build_info: logs.log_error( 'Unable to fetch information on latest build artifact for ' 'branch %s and target %s.' % (branch, target)) return # Check if our local build matches the latest build. If not, we will # download it. build_id = build_info['bid'] target = build_info['target'] image_directory = environment.get_value('IMAGES_DIR') last_build_info = persistent_cache.get_value(LAST_FLASH_BUILD_KEY) if not last_build_info or last_build_info['bid'] != build_id: # Clean up the images directory first. shell.remove_directory(image_directory, recreate=True) # We have a new build, download the build artifacts for it. for image_regex in FLASH_IMAGE_REGEXES: image_file_path = fetch_artifact.get( build_id, target, image_regex, image_directory) if not image_file_path: logs.log_error( 'Failed to download image artifact %s for ' 'branch %s and target %s.' % (image_file_path, branch, target)) return if image_file_path.endswith('.zip'): archive.unpack(image_file_path, image_directory) # We do one device flash at a time on one host, otherwise we run into # failures and device being stuck in a bad state. flash_lock_key_name = 'flash:%s' % socket.gethostname() if not locks.acquire_lock(flash_lock_key_name, by_zone=True): logs.log_error( 'Failed to acquire lock for reimaging, exiting.') return logs.log('Reimaging started.') logs.log('Rebooting into bootloader mode.') for _ in xrange(FLASH_RETRIES): adb.run_as_root() adb.run_adb_command(['reboot-bootloader']) time.sleep(FLASH_REBOOT_BOOTLOADER_WAIT) adb.run_fastboot_command(['oem', 'off-mode-charge', '0']) adb.run_fastboot_command(['-w', 'reboot-bootloader']) for partition, partition_image_filename in FLASH_IMAGE_FILES: partition_image_file_path = os.path.join( image_directory, partition_image_filename) adb.run_fastboot_command( ['flash', partition, partition_image_file_path]) if partition in ['bootloader', 'radio']: adb.run_fastboot_command(['reboot-bootloader']) adb.run_fastboot_command('reboot') time.sleep(FLASH_REBOOT_WAIT) if adb.get_device_state() == 'device': break logs.log_error('Reimaging failed, retrying.') locks.release_lock(flash_lock_key_name, by_zone=True) if adb.get_device_state() != 'device': logs.log_error('Unable to find device. Reimaging failed.') adb.bad_state_reached() logs.log('Reimaging finished.') # Reset all of our persistent keys after wipe. persistent_cache.delete_value(BUILD_PROP_MD5_KEY) persistent_cache.delete_value(LAST_TEST_ACCOUNT_CHECK_KEY) persistent_cache.set_value(LAST_FLASH_BUILD_KEY, build_info) persistent_cache.set_value(LAST_FLASH_TIME_KEY, time.time())
def flash_to_latest_build_if_needed(): """Wipes user data, resetting the device to original factory state.""" if environment.get_value('LOCAL_DEVELOPMENT'): # Don't reimage local development devices. return run_timeout = environment.get_value('RUN_TIMEOUT') if run_timeout: # If we have a run timeout, then we are already scheduled to bail out and # will be probably get re-imaged. E.g. using frameworks like Tradefed. return # Check if a flash is needed based on last recorded flash time. last_flash_time = persistent_cache.get_value( constants.LAST_FLASH_TIME_KEY, constructor=datetime.datetime.utcfromtimestamp) needs_flash = last_flash_time is None or dates.time_has_expired( last_flash_time, seconds=FLASH_INTERVAL) if not needs_flash: return is_google_device = settings.is_google_device() if is_google_device is None: logs.log_error('Unable to query device. Reimaging failed.') adb.bad_state_reached() elif not is_google_device: # We can't reimage these, skip. logs.log('Non-Google device found, skipping reimage.') return # Check if both |BUILD_BRANCH| and |BUILD_TARGET| environment variables # are set. If not, we don't have enough data for reimaging and hence # we bail out. branch = environment.get_value('BUILD_BRANCH') target = environment.get_value('BUILD_TARGET') if not target: # We default to userdebug configuration. build_params = settings.get_build_parameters() if build_params: target = build_params.get('target') + '-userdebug' # Cache target in environment. This is also useful for cases when # device is bricked and we don't have this information available. environment.set_value('BUILD_TARGET', target) if not branch or not target: logs.log_warn( 'BUILD_BRANCH and BUILD_TARGET are not set, skipping reimage.') return image_directory = environment.get_value('IMAGES_DIR') build_info = fetch_artifact.get_latest_artifact_info(branch, target) if not build_info: logs.log_error('Unable to fetch information on latest build artifact for ' 'branch %s and target %s.' % (branch, target)) return if environment.is_android_cuttlefish(): download_latest_build(build_info, FLASH_CUTTLEFISH_REGEXES, image_directory) adb.recreate_cuttlefish_device() adb.connect_to_cuttlefish_device() else: download_latest_build(build_info, FLASH_IMAGE_REGEXES, image_directory) # We do one device flash at a time on one host, otherwise we run into # failures and device being stuck in a bad state. flash_lock_key_name = 'flash:%s' % socket.gethostname() if not locks.acquire_lock(flash_lock_key_name, by_zone=True): logs.log_error('Failed to acquire lock for reimaging, exiting.') return logs.log('Reimaging started.') logs.log('Rebooting into bootloader mode.') for _ in range(FLASH_RETRIES): adb.run_as_root() adb.run_command(['reboot-bootloader']) time.sleep(FLASH_REBOOT_BOOTLOADER_WAIT) adb.run_fastboot_command(['oem', 'off-mode-charge', '0']) adb.run_fastboot_command(['-w', 'reboot-bootloader']) for partition, partition_image_filename in FLASH_IMAGE_FILES: partition_image_file_path = os.path.join(image_directory, partition_image_filename) adb.run_fastboot_command( ['flash', partition, partition_image_file_path]) if partition in ['bootloader', 'radio']: adb.run_fastboot_command(['reboot-bootloader']) # Disable ramdump to avoid capturing ramdumps during kernel crashes. # This causes device lockup of several minutes during boot and we intend # to analyze them ourselves. adb.run_fastboot_command(['oem', 'ramdump', 'disable']) adb.run_fastboot_command('reboot') time.sleep(FLASH_REBOOT_WAIT) if adb.get_device_state() == 'device': break logs.log_error('Reimaging failed, retrying.') locks.release_lock(flash_lock_key_name, by_zone=True) if adb.get_device_state() != 'device': logs.log_error('Unable to find device. Reimaging failed.') adb.bad_state_reached() logs.log('Reimaging finished.') # Reset all of our persistent keys after wipe. persistent_cache.delete_value(constants.BUILD_PROP_MD5_KEY) persistent_cache.delete_value(constants.LAST_TEST_ACCOUNT_CHECK_KEY) persistent_cache.set_value(constants.LAST_FLASH_BUILD_KEY, build_info) persistent_cache.set_value(constants.LAST_FLASH_TIME_KEY, time.time())
def _check_and_update_similar_bug(testcase, issue_tracker): """Get list of similar open issues and ones that were recently closed.""" # Get similar testcases from the same group. similar_testcases_from_group = [] if testcase.group_id: group_query = data_types.Testcase.query( data_types.Testcase.group_id == testcase.group_id) similar_testcases_from_group = ndb_utils.get_all_from_query( group_query, batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT // 2) # Get testcases with the same crash params. These might not be in the a group # if they were just fixed. same_crash_params_query = data_types.Testcase.query( data_types.Testcase.crash_type == testcase.crash_type, data_types.Testcase.crash_state == testcase.crash_state, data_types.Testcase.security_flag == testcase.security_flag, data_types.Testcase.project_name == testcase.project_name, data_types.Testcase.status == 'Processed') similar_testcases_from_query = ndb_utils.get_all_from_query( same_crash_params_query, batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT // 2) for similar_testcase in itertools.chain(similar_testcases_from_group, similar_testcases_from_query): # Exclude ourself from comparison. if similar_testcase.key.id() == testcase.key.id(): continue # Exclude similar testcases without bug information. if not similar_testcase.bug_information: continue # Get the issue object given its ID. issue = issue_tracker.get_issue(similar_testcase.bug_information) if not issue: continue # If the reproducible issue is not verified yet, bug is still valid and # might be caused by non-availability of latest builds. In that case, # don't file a new bug yet. if similar_testcase.open and not similar_testcase.one_time_crasher_flag: return True # If the issue is still open, no need to file a duplicate bug. if issue.is_open: return True # If the issue indicates that this crash needs to be ignored, no need to # file another one. policy = issue_tracker_policy.get(issue_tracker.project) ignore_label = policy.label('ignore') if ignore_label in issue.labels: _add_triage_message(testcase, ( 'Skipping filing a bug since similar testcase ({testcase_id}) in ' 'issue ({issue_id}) is blacklisted with {ignore_label} label.' ).format(testcase_id=similar_testcase.key.id(), issue_id=issue.id, ignore_label=ignore_label)) return True # If the issue is recently closed, wait certain time period to make sure # our fixed verification has completed. if (issue.closed_time and not dates.time_has_expired( issue.closed_time, hours=data_types.MIN_ELAPSED_TIME_SINCE_FIXED)): _add_triage_message( testcase, ('Delaying filing a bug since similar testcase ' '({testcase_id}) in issue ({issue_id}) was just fixed.' ).format(testcase_id=similar_testcase.key.id(), issue_id=issue.id)) return True return False
def get(self): """Handle a get request.""" try: grouper.group_testcases() except: logs.log_error('Error occurred while grouping test cases.') return # Free up memory after group task run. utils.python_gc() # Get a list of jobs excluded from bug filing. excluded_jobs = _get_excluded_jobs() # Get a list of all jobs. This is used to filter testcases whose jobs have # been removed. all_jobs = data_handler.get_all_job_type_names() for testcase_id in data_handler.get_open_testcase_id_iterator(): try: testcase = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: # Already deleted. continue # Skip if testcase's job is removed. if testcase.job_type not in all_jobs: continue # Skip if testcase's job is in exclusions list. if testcase.job_type in excluded_jobs: continue # Skip if we are running progression task at this time. if testcase.get_metadata('progression_pending'): continue # If the testcase has a bug filed already, no triage is needed. if _is_bug_filed(testcase): continue # Check if the crash is important, i.e. it is either a reproducible crash # or an unreproducible crash happening frequently. if not _is_crash_important(testcase): continue # Require that all tasks like minimizaton, regression testing, etc have # finished. if not data_handler.critical_tasks_completed(testcase): continue # For testcases that are not part of a group, wait an additional time till # group task completes. # FIXME: In future, grouping might be dependent on regression range, so we # would have to add an additional wait time. if not testcase.group_id and not dates.time_has_expired( testcase.timestamp, hours=data_types.MIN_ELAPSED_TIME_SINCE_REPORT): continue # If this project does not have an associated issue tracker, we cannot # file this crash anywhere. issue_tracker = issue_tracker_utils.get_issue_tracker_for_testcase( testcase) if not issue_tracker: continue # If there are similar issues to this test case already filed or recently # closed, skip filing a duplicate bug. if _check_and_update_similar_bug(testcase, issue_tracker): continue # Clean up old triage messages that would be not applicable now. testcase.delete_metadata(TRIAGE_MESSAGE_KEY, update_testcase=False) # File the bug first and then create filed bug metadata. try: issue_filer.file_issue(testcase, issue_tracker) except Exception: logs.log_error('Failed to file issue for testcase %d.' % testcase_id) continue _create_filed_bug_metadata(testcase) logs.log('Filed new issue %s for testcase %d.' % (testcase.bug_information, testcase_id))
def notify_closed_issue_if_testcase_is_open(policy, testcase, issue): """Notify closed issue if associated testcase is still open after a certain time period.""" needs_feedback_label = policy.label('needs_feedback') if not needs_feedback_label: return # If the testcase is already closed, no more work to do. if not testcase.open: return # Check testcase status, so as to skip unreproducible uploads. if testcase.status not in ['Processed', 'Duplicate']: return # If there is no associated issue, then bail out. if not issue or not testcase.bug_information: return # If the issue is still open, no work needs to be done. Bail out. if issue.is_open: return # If we have already passed our deadline based on issue closed timestamp, # no need to notify. We will close the testcase instead. if (issue.closed_time and not dates.time_has_expired( issue.closed_time, days=data_types.NOTIFY_CLOSED_BUG_WITH_OPEN_TESTCASE_DEADLINE)): return # Check if there is ignore label on issue already. If yes, bail out. if issue_tracker_utils.was_label_added(issue, policy.label('ignore')): return # Check if we did add the notification comment already. If yes, bail out. if issue_tracker_utils.was_label_added(issue, needs_feedback_label): return issue.labels.add(needs_feedback_label) if issue.status in [policy.status('fixed'), policy.status('verified')]: issue_comment = ( 'ClusterFuzz testcase {id} is still reproducing on tip-of-tree build ' '(trunk).\n\nPlease re-test your fix against this testcase and if the ' 'fix was incorrect or incomplete, please re-open the bug.' ).format(id=testcase.key.id()) wrong_label = policy.label('wrong') if wrong_label: issue_comment += ( (' Otherwise, ignore this notification and add the ' '{label_text}.' ).format(label_text=issue.issue_tracker.label_text(wrong_label))) else: # Covers WontFix, Archived cases. issue_comment = ( 'ClusterFuzz testcase {id} is still reproducing on tip-of-tree build ' '(trunk).\n\nIf this testcase was not reproducible locally or ' 'unworkable, ignore this notification and we will file another ' 'bug soon with hopefully a better and workable testcase.\n\n'.format( id=testcase.key.id())) ignore_label = policy.label('ignore') if ignore_label: issue_comment += ( 'Otherwise, if this is not intended to be fixed (e.g. this is an ' 'intentional crash), please add the {label_text} to ' 'prevent future bug filing with similar crash stacktrace.'.format( label_text=issue.issue_tracker.label_text(ignore_label))) issue.save(new_comment=issue_comment, notify=True) logs.log('Notified closed issue for open testcase %d.' % testcase.key.id())