def test_group_exceed_max_testcases(self): """Test that group auto-shrinks when it exceeds maximum number of testcases.""" for i in range(1, 31): testcase = test_utils.create_generic_testcase() testcase.crash_type = 'Heap-buffer-overflow' testcase.crash_state = 'abcdefgh' + str(i) testcase.project_name = 'project' testcase.one_time_crasher_flag = False # Attach actual issues to some testcases. if i in [3, 4, 5]: testcase.bug_information = '123' # Make some testcases unreproducible. if i in [1, 2, 3]: testcase.one_time_crasher_flag = True testcase.put() unrelated_testcase = test_utils.create_generic_testcase() grouper.group_testcases() testcase_ids = list(data_handler.get_open_testcase_id_iterator()) # [1, 2] get removed since they are unreproducible testcases. # [3] is not removed since it has bug attached (even though unreproducible). # [6, 7, 8] are removed to account for max group size. Even though they # are reproducible, they are the ones with least weight. expected_testcase_ids = [3, 4, 5] + list(range( 9, 31)) + [unrelated_testcase.key.id()] self.assertEqual(expected_testcase_ids, testcase_ids)
def get(self): """Handle a GET request.""" # pylint: disable=unexpected-keyword-arg # Memoize all project and job names. _ = data_handler.get_all_project_names(__memoize_force__=True) _ = data_handler.get_all_job_type_names(__memoize_force__=True) # Memoize both variants of get_all_fuzzer_names_including_children. _ = data_handler.get_all_fuzzer_names_including_children( include_parents=True, __memoize_force__=True) _ = data_handler.get_all_fuzzer_names_including_children( __memoize_force__=True) # Memoize expensive testcase attribute calls. for testcase_id in data_handler.get_open_testcase_id_iterator(): try: testcase = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: # Already deleted. continue blobs.get_blob_size(testcase.fuzzed_keys) blobs.get_blob_size(testcase.minimized_keys) self.response.headers['Content-Type'] = 'text/plain' self.response.out.write('OK') self.response.set_status(200)
def test_same_crash_same_security(self): """Test that crashes with same crash states and same security flags get de-duplicated with one of them removed.""" for index, t in enumerate(self.testcases): t.security_flag = True t.crash_state = 'abc\ndef' t.timestamp = datetime.datetime.utcfromtimestamp(index) t.put() grouper.group_testcases() testcases = [] for testcase_id in data_handler.get_open_testcase_id_iterator(): testcases.append(data_handler.get_testcase_by_id(testcase_id)) self.assertEqual(len(testcases), 1) self.assertEqual(testcases[0].group_id, 0) self.assertTrue(testcases[0].is_leader)
def test_unminimized(self): """Test that unminimized testcase is not processed for grouping.""" self.testcases[0].security_flag = True self.testcases[0].crash_state = 'abc\ndef' self.testcases[0].crash_type = 'Heap-buffer-overflow\nREAD {*}' self.testcases[0].minimized_keys = None self.testcases[1].security_flag = True self.testcases[1].crash_state = 'abc\ndef' self.testcases[1].crash_type = 'Heap-buffer-overflow\nREAD 3' for t in self.testcases: t.put() grouper.group_testcases() testcases = [] for testcase_id in data_handler.get_open_testcase_id_iterator(): testcases.append(data_handler.get_testcase_by_id(testcase_id)) self.assertEqual(len(testcases), 2) self.assertEqual(testcases[0].group_id, 0) self.assertFalse(testcases[0].is_leader) self.assertEqual(testcases[1].group_id, 0) self.assertTrue(testcases[1].is_leader)
def test_same_unique_crash_type_with_same_state(self): """Test that the crashes with same unique crash type and same state get de-duplicated with one of them removed..""" self.testcases[0].security_flag = False self.testcases[0].crash_type = 'Timeout' self.testcases[0].crash_state = 'abcde' self.testcases[0].timestamp = datetime.datetime.utcfromtimestamp(0) self.testcases[1].security_flag = False self.testcases[1].crash_type = 'Timeout' self.testcases[1].crash_state = 'abcde' self.testcases[1].timestamp = datetime.datetime.utcfromtimestamp(1) for t in self.testcases: t.put() grouper.group_testcases() testcases = [] for testcase_id in data_handler.get_open_testcase_id_iterator(): testcases.append(data_handler.get_testcase_by_id(testcase_id)) self.assertEqual(len(testcases), 1) self.assertEqual(testcases[0].group_id, 0) self.assertTrue(testcases[0].is_leader)
def get(self): """Handle a get request.""" try: grouper.group_testcases() except: logs.log_error('Error occurred while grouping test cases.') return # Free up memory after group task run. utils.python_gc() # Get a list of jobs excluded from bug filing. excluded_jobs = _get_excluded_jobs() # Get a list of all jobs. This is used to filter testcases whose jobs have # been removed. all_jobs = data_handler.get_all_job_type_names() for testcase_id in data_handler.get_open_testcase_id_iterator(): try: testcase = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: # Already deleted. continue # Skip if testcase's job is removed. if testcase.job_type not in all_jobs: continue # Skip if testcase's job is in exclusions list. if testcase.job_type in excluded_jobs: continue # Skip if we are running progression task at this time. if testcase.get_metadata('progression_pending'): continue # If the testcase has a bug filed already, no triage is needed. if _is_bug_filed(testcase): continue # Check if the crash is important, i.e. it is either a reproducible crash # or an unreproducible crash happening frequently. if not _is_crash_important(testcase): continue # Require that all tasks like minimizaton, regression testing, etc have # finished. if not data_handler.critical_tasks_completed(testcase): continue # For testcases that are not part of a group, wait an additional time till # group task completes. # FIXME: In future, grouping might be dependent on regression range, so we # would have to add an additional wait time. if not testcase.group_id and not dates.time_has_expired( testcase.timestamp, hours=data_types.MIN_ELAPSED_TIME_SINCE_REPORT): continue # If this project does not have an associated issue tracker, we cannot # file this crash anywhere. issue_tracker = issue_tracker_utils.get_issue_tracker_for_testcase( testcase) if not issue_tracker: continue # If there are similar issues to this test case already filed or recently # closed, skip filing a duplicate bug. if _check_and_update_similar_bug(testcase, issue_tracker): continue # Clean up old triage messages that would be not applicable now. testcase.delete_metadata(TRIAGE_MESSAGE_KEY, update_testcase=False) # File the bug first and then create filed bug metadata. try: issue_filer.file_issue(testcase, issue_tracker) except Exception: logs.log_error('Failed to file issue for testcase %d.' % testcase_id) continue _create_filed_bug_metadata(testcase) logs.log('Filed new issue %s for testcase %d.' % (testcase.bug_information, testcase_id))
def group_testcases(): """Group testcases based on rules like same bug numbers, similar crash states, etc.""" testcase_map = {} cached_issue_map = {} for testcase_id in data_handler.get_open_testcase_id_iterator(): try: testcase = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: # Already deleted. continue # Remove duplicates early on to avoid large groups. if (not testcase.bug_information and not testcase.uploader_email and has_testcase_with_same_params(testcase, testcase_map)): logs.log('Deleting duplicate testcase %d.' % testcase_id) testcase.key.delete() continue # Wait for minimization to finish as this might change crash params such # as type and may mark it as duplicate / closed. if not testcase.minimized_keys: continue # Store needed testcase attributes into |testcase_map|. testcase_map[testcase_id] = TestcaseAttributes() testcase_attributes = testcase_map[testcase_id] for attribute_name in FORWARDED_ATTRIBUTES: setattr(testcase_attributes, attribute_name, getattr(testcase, attribute_name)) # Store original issue mappings in the testcase attributes. if testcase.bug_information: issue_id = int(testcase.bug_information) project_name = testcase.project_name if (project_name in cached_issue_map and issue_id in cached_issue_map[project_name]): testcase_attributes.issue_id = ( cached_issue_map[project_name][issue_id]) else: issue_tracker = issue_tracker_utils.get_issue_tracker_for_testcase( testcase) if not issue_tracker: continue # Determine the original issue id traversing the list of duplicates. try: issue = issue_tracker.get_original_issue(issue_id) original_issue_id = issue.id except: # If we are unable to access the issue, then we can't determine # the original issue id. Assume that it is the same as issue id. logs.log_error( 'Unable to determine original issue for %d.' % issue_id) original_issue_id = issue_id if project_name not in cached_issue_map: cached_issue_map[project_name] = {} cached_issue_map[project_name][issue_id] = original_issue_id cached_issue_map[project_name][original_issue_id] = original_issue_id testcase_attributes.issue_id = original_issue_id # No longer needed. Free up some memory. cached_issue_map.clear() group_testcases_with_similar_states(testcase_map) group_testcases_with_same_issues(testcase_map) group_leader.choose(testcase_map) # TODO(aarya): Replace with an optimized implementation using dirty flag. # Update the group mapping in testcase object. for testcase_id in data_handler.get_open_testcase_id_iterator(): if testcase_id not in testcase_map: # A new testcase that was just created. Skip for now, will be grouped in # next iteration of group task. continue # If we are part of a group, then calculate the number of testcases in that # group and lowest issue id of issues associated with testcases in that # group. updated_group_id = testcase_map[testcase_id].group_id updated_is_leader = testcase_map[testcase_id].is_leader updated_group_id_count = 0 updated_group_bug_information = 0 if updated_group_id: for other_testcase in six.itervalues(testcase_map): if other_testcase.group_id != updated_group_id: continue updated_group_id_count += 1 # Update group issue id to be lowest issue id in the entire group. if other_testcase.issue_id is None: continue if (not updated_group_bug_information or updated_group_bug_information > other_testcase.issue_id): updated_group_bug_information = other_testcase.issue_id # If this group id is used by only one testcase, then remove it. if updated_group_id_count == 1: data_handler.delete_group(updated_group_id, update_testcases=False) updated_group_id = 0 updated_group_bug_information = 0 updated_is_leader = True # If this group has more than the maximum allowed testcases, log an error # so that the sheriff can later debug what caused this. Usually, this is a # bug in grouping logic OR a ever changing crash signature (e.g. slightly # different crash types or crash states). We cannot bail out as otherwise, # we will not group the testcase leading to a spam of new filed bugs. if updated_group_id_count > GROUP_MAX_TESTCASE_LIMIT: logs.log_error( 'Group %d exceeds maximum allowed testcases.' % updated_group_id) try: testcase = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: # Already deleted. continue is_changed = ( (testcase.group_id != updated_group_id) or (testcase.group_bug_information != updated_group_bug_information) or (testcase.is_leader != updated_is_leader)) if not is_changed: # If nothing is changed, no more work to do. It's faster this way. continue testcase.group_bug_information = updated_group_bug_information testcase.group_id = updated_group_id testcase.is_leader = updated_is_leader testcase.put() logs.log( 'Updated testcase %d group to %d.' % (testcase_id, updated_group_id))