def test_group_all_by_variant_task(self): """ Tests that summarize_by() correctly accumulates all unique combinations of (variant, task). """ report = test_failures.Report(self.ENTRIES) summed_entries = report.summarize_by(["variant", "task"]) self.assertEqual(3, len(summed_entries)) self.assertEqual(summed_entries[0], self.ENTRY._replace( task="jsCore", start_date=datetime.date(2017, 6, 5), end_date=datetime.date(2017, 6, 5), num_pass=0, num_fail=1, )) self.assertEqual(summed_entries[1], self.ENTRY._replace( test=test_failures.Wildcard("tests"), distro=test_failures.Wildcard("distros"), start_date=datetime.date(2017, 6, 3), end_date=datetime.date(2017, 6, 10), num_pass=4, num_fail=1, )) self.assertEqual(summed_entries[2], self.ENTRY._replace( variant="linux-64-debug", start_date=datetime.date(2017, 6, 17), end_date=datetime.date(2017, 6, 17), num_pass=0, num_fail=1, ))
def transition_from_unreliable_to_reliable(self, config, initial_tags): """ Tests that update_tags() untags a formerly unreliable combination after it has become reliable again. """ lifecycle = ci_tags.TagsConfig.from_dict( dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog( lifecycle) self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) tests = ["jstests/core/all.js"] report = test_failures.Report([ self.ENTRY._replace(num_pass=1, num_fail=0), self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"), self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"), self.ENTRY._replace(num_pass=0, num_fail=1), self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"), ]) update_test_lifecycle.validate_config(config) update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests) updated_tags = self.assert_has_only_js_tests(lifecycle) self.assertEqual(updated_tags, collections.OrderedDict())
def test_group_9days_by_test(self): """ Tests that summarize_by() correctly accumulates by multiple days, including time periods greater than 1 week. """ report = test_failures.Report(self.ENTRIES) summed_entries = report.summarize_by(test_failures.Report.TEST, time_period=datetime.timedelta(days=9)) self.assertEqual(3, len(summed_entries)) self.assertEqual(summed_entries[0], self.ENTRY._replace( task=test_failures.Wildcard("tasks"), distro=test_failures.Wildcard("distros"), start_date=datetime.date(2017, 6, 3), end_date=datetime.date(2017, 6, 11), num_pass=3, num_fail=2, )) self.assertEqual(summed_entries[1], self.ENTRY._replace( variant="linux-64-debug", start_date=datetime.date(2017, 6, 12), end_date=datetime.date(2017, 6, 17), num_pass=0, num_fail=1, )) self.assertEqual(summed_entries[2], self.ENTRY._replace( test="jstests/core/all2.js", start_date=datetime.date(2017, 6, 3), end_date=datetime.date(2017, 6, 11), num_pass=1, num_fail=0, ))
def test_remain_reliable(self): """ Tests that update_tags() preserves the absence of tags for reliable combinations. """ config = self.CONFIG._replace( test_fail_rates=self.CONFIG.test_fail_rates._replace(acceptable=0.9), task_fail_rates=self.CONFIG.task_fail_rates._replace(acceptable=0.9), variant_fail_rates=self.CONFIG.variant_fail_rates._replace(acceptable=0.9), distro_fail_rates=self.CONFIG.distro_fail_rates._replace(acceptable=0.9)) initial_tags = collections.OrderedDict() lifecycle = ci_tags.TagsConfig.from_dict( dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle) self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) tests = ["jstests/core/all.js"] report = test_failures.Report([ self.ENTRY._replace(num_pass=1, num_fail=0), self.ENTRY._replace(num_pass=1, num_fail=0, task="jsCore"), self.ENTRY._replace(num_pass=1, num_fail=0, variant="linux-64-debug"), self.ENTRY._replace(num_pass=0, num_fail=1), self.ENTRY._replace(num_pass=1, num_fail=0, distro="rhel55"), ]) update_test_lifecycle.validate_config(config) update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests) updated_tags = self.assert_has_only_js_tests(lifecycle) self.assertEqual(updated_tags, initial_tags)
def test_obeys_unreliable_min_runs(self): """ Tests that update_tags() only considers a test unreliable if it has more than 'unreliable_min_runs'. """ config = self.CONFIG._replace( test_fail_rates=self.CONFIG.test_fail_rates._replace(unacceptable=0.1), task_fail_rates=self.CONFIG.task_fail_rates._replace(unacceptable=0.1), variant_fail_rates=self.CONFIG.variant_fail_rates._replace(unacceptable=0.1), distro_fail_rates=self.CONFIG.distro_fail_rates._replace(unacceptable=0.1), unreliable_min_runs=100) initial_tags = collections.OrderedDict() lifecycle = ci_tags.TagsConfig.from_dict( dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog(lifecycle) self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) tests = ["jstests/core/all.js"] report = test_failures.Report([ self.ENTRY._replace(num_pass=0, num_fail=1), self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"), self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"), self.ENTRY._replace(num_pass=1, num_fail=0), self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"), ]) update_test_lifecycle.validate_config(config) update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests) updated_tags = self.assert_has_only_js_tests(lifecycle) self.assertEqual(updated_tags, initial_tags)
def transition_from_reliable_to_unreliable(self, config, expected_tags): """ Tests that update_tags() tags a formerly reliable combination as being unreliable. """ initial_tags = collections.OrderedDict() lifecycle = ci_tags.TagsConfig.from_dict( dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) self.assertEqual(collections.OrderedDict(), self.assert_has_only_js_tests(lifecycle)) report = test_failures.Report([ self.ENTRY._replace(num_pass=0, num_fail=1), self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"), self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"), self.ENTRY._replace(num_pass=1, num_fail=0), self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"), ]) update_test_lifecycle.validate_config(config) update_test_lifecycle.update_tags(lifecycle, config, report) updated_tags = self.assert_has_only_js_tests(lifecycle) self.assertEqual(updated_tags, expected_tags)
def test_non_running_at_all_is_reliable(self): """ Tests that tests that are tagged as unreliable but no longer running (either during the reliable or the unreliable period) have their tags removed. """ config = self.CONFIG tests = ["jstests/core/all.js", "jstests/core/all2.js"] initial_tags = collections.OrderedDict([ ("jstests/core/all2.js", [ "unreliable", "unreliable|jsCore_WT", "unreliable|jsCore_WT|linux-64", "unreliable|jsCore_WT|linux-64|rhel62", ]), ]) lifecycle = ci_tags.TagsConfig.from_dict( dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog( lifecycle) self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) # all2.js did not run at all report = test_failures.Report([self.ENTRY]) update_test_lifecycle.validate_config(config) update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests) updated_tags = self.assert_has_only_js_tests(lifecycle) # The tags for variant and distro have been removed. self.assertEqual(updated_tags, collections.OrderedDict([]))
def test_group_all_by_test_task(self): """ Tests that summarize_by() correctly accumulates all unique combinations of (test, task). """ report = test_failures.Report(self.ENTRIES) summed_entries = report.summarize_by(test_failures.Report.TEST_TASK) self.assertEqual(3, len(summed_entries)) self.assertEqual(summed_entries[0], self.ENTRY._replace( task="jsCore", start_date=datetime.date(2017, 6, 5), end_date=datetime.date(2017, 6, 5), num_pass=0, num_fail=1, )) self.assertEqual(summed_entries[1], self.ENTRY._replace( variant=test_failures.Wildcard("variants"), distro=test_failures.Wildcard("distros"), start_date=datetime.date(2017, 6, 3), end_date=datetime.date(2017, 6, 17), num_pass=3, num_fail=2, )) self.assertEqual(summed_entries[2], self.ENTRY._replace( test="jstests/core/all2.js", start_date=datetime.date(2017, 6, 10), end_date=datetime.date(2017, 6, 10), num_pass=1, num_fail=0, ))
def test_obeys_reliable_time_period(self): """ Tests that update_tags() ignores passes from before 'reliable_time_period'. """ config = self.CONFIG._replace( test_fail_rates=self.CONFIG.test_fail_rates._replace( acceptable=0.9), task_fail_rates=self.CONFIG.task_fail_rates._replace( acceptable=0.9), variant_fail_rates=self.CONFIG.variant_fail_rates._replace( acceptable=0.9), distro_fail_rates=self.CONFIG.distro_fail_rates._replace( acceptable=0.9)) initial_tags = collections.OrderedDict() lifecycle = ci_tags.TagsConfig.from_dict( dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog( lifecycle) self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) tests = ["jstests/core/all.js"] report = test_failures.Report([ self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=1)), end_date=(self.ENTRY.end_date - datetime.timedelta(days=1)), num_pass=1, num_fail=0), self.ENTRY._replace(start_date=(self.ENTRY.start_date - datetime.timedelta(days=2)), end_date=(self.ENTRY.end_date - datetime.timedelta(days=2)), num_pass=1, num_fail=0), self.ENTRY._replace(num_pass=0, num_fail=1), self.ENTRY._replace(num_pass=0, num_fail=1), self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"), self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"), self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"), ]) update_test_lifecycle.validate_config(config) update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests) updated_tags = self.assert_has_only_js_tests(lifecycle) self.assertEqual( updated_tags, collections.OrderedDict([ ("jstests/core/all.js", [ "unreliable", "unreliable|jsCore_WT", "unreliable|jsCore_WT|linux-64", "unreliable|jsCore_WT|linux-64|rhel62", ]), ]))
def test_non_running_in_reliable_period_is_reliable(self): """ Tests that tests that have a failure rate above the unacceptable rate during the unreliable period but haven't run during the reliable period are marked as reliable. """ # Unreliable period is 2 days: 2017-06-03 to 2017-06-04. # Reliable period is 1 day: 2016-06-04. reliable_period_date = datetime.date(2017, 6, 4) config = self.CONFIG._replace( test_fail_rates=self.CONFIG.test_fail_rates._replace( unacceptable=0.1), task_fail_rates=self.CONFIG.task_fail_rates._replace( unacceptable=0.1), variant_fail_rates=self.CONFIG.variant_fail_rates._replace( unacceptable=0.1), distro_fail_rates=self.CONFIG.distro_fail_rates._replace( unacceptable=0.1), unreliable_time_period=datetime.timedelta(days=2)) tests = ["jstests/core/all.js"] initial_tags = collections.OrderedDict([ ("jstests/core/all.js", [ "unreliable", "unreliable|jsCore_WT", "unreliable|jsCore_WT|linux-64", "unreliable|jsCore_WT|linux-64|rhel62", ]), ]) lifecycle = ci_tags.TagsConfig.from_dict( dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) summary_lifecycle = update_test_lifecycle.TagsConfigWithChangelog( lifecycle) self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) # The test did not run on the reliable period on linux-64. report = test_failures.Report([ # Failing. self.ENTRY._replace(num_pass=0, num_fail=2), # Passing on a different variant. self.ENTRY._replace(start_date=reliable_period_date, end_date=reliable_period_date, num_pass=3, num_fail=0, variant="linux-alt", distro="debian7"), ]) update_test_lifecycle.validate_config(config) update_test_lifecycle.update_tags(summary_lifecycle, config, report, tests) updated_tags = self.assert_has_only_js_tests(lifecycle) # The tags for variant and distro have been removed. self.assertEqual( updated_tags, collections.OrderedDict([ ("jstests/core/all.js", ["unreliable", "unreliable|jsCore_WT"]) ]))
def update_tags(lifecycle_tags, config, report): """ Updates the tags in 'lifecycle_tags' based on the historical test failures mentioned in 'report' according to the model described by 'config'. """ # We initialize 'grouped_entries' to make PyLint not complain about 'grouped_entries' being used # before assignment. grouped_entries = None for (i, (components, rates)) in enumerate( ((tf.Report.TEST_TASK_VARIANT_DISTRO, config.distro_fail_rates), (tf.Report.TEST_TASK_VARIANT, config.variant_fail_rates), (tf.Report.TEST_TASK, config.task_fail_rates), (tf.Report.TEST, config.test_fail_rates))): if i > 0: report = tf.Report(grouped_entries) # We reassign the value of 'grouped_entries' to take advantage of how data that is on # (test, task, variant, distro) preserves enough information to be grouped on any subset of # those components, etc. grouped_entries = report.summarize_by(components, time_period=tf.Report.DAILY) # Filter out any test executions from prior to 'config.unreliable_time_period'. unreliable_start_date = (report.end_date - config.unreliable_time_period + datetime.timedelta(days=1)) unreliable_report = tf.Report(entry for entry in grouped_entries if entry.start_date >= unreliable_start_date) update_lifecycle(lifecycle_tags, unreliable_report.summarize_by(components), unreliable_test, True, rates.unacceptable, config.unreliable_min_runs) # Filter out any test executions from prior to 'config.reliable_time_period'. reliable_start_date = (report.end_date - config.reliable_time_period + datetime.timedelta(days=1)) reliable_report = tf.Report(entry for entry in grouped_entries if entry.start_date >= reliable_start_date) update_lifecycle(lifecycle_tags, reliable_report.summarize_by(components), reliable_test, False, rates.acceptable, config.reliable_min_runs)
def test_group_all_by_test_task_variant_distro(self): """ Tests that summarize_by() correctly accumulates all unique combinations of (test, task, variant, distro). """ report = test_failures.Report(self.ENTRIES) summed_entries = report.summarize_by( test_failures.Report.TEST_TASK_VARIANT_DISTRO) self.assertEqual(5, len(summed_entries)) self.assertEqual( summed_entries[0], self.ENTRY._replace( task="jsCore", start_date=datetime.date(2017, 6, 5), end_date=datetime.date(2017, 6, 5), num_pass=0, num_fail=1, )) self.assertEqual( summed_entries[1], self.ENTRY._replace( distro="rhel55", start_date=datetime.date(2017, 6, 10), end_date=datetime.date(2017, 6, 10), num_pass=0, num_fail=1, )) self.assertEqual( summed_entries[2], self.ENTRY._replace( start_date=datetime.date(2017, 6, 3), end_date=datetime.date(2017, 6, 10), num_pass=3, num_fail=0, )) self.assertEqual( summed_entries[3], self.ENTRY._replace( variant="linux-64-debug", start_date=datetime.date(2017, 6, 17), end_date=datetime.date(2017, 6, 17), num_pass=0, num_fail=1, )) self.assertEqual( summed_entries[4], self.ENTRY._replace( test="jstests/core/all2.js", start_date=datetime.date(2017, 6, 10), end_date=datetime.date(2017, 6, 10), num_pass=1, num_fail=0, ))
def test_group_weekly_by_test_starting_on_date(self): """ Tests that summarize_by() correctly accumulates by week when the beginning of the week is specified as a datetime.date() value. """ date = datetime.date(2017, 6, 7) self.assertEqual(2, date.weekday(), "2017 Jun 07 is a Wednesday") report = test_failures.Report(self.ENTRIES) summed_entries = report.summarize_by( test_failures.Report.TEST, time_period=test_failures.Report.WEEKLY, start_day_of_week=date) self.assertEqual(4, len(summed_entries)) self.assertEqual( summed_entries[0], self.ENTRY._replace( task=test_failures.Wildcard("tasks"), start_date=datetime.date(2017, 6, 3), end_date=datetime.date(2017, 6, 6), num_pass=1, num_fail=1, )) self.assertEqual( summed_entries[1], self.ENTRY._replace( distro=test_failures.Wildcard("distros"), start_date=datetime.date(2017, 6, 7), end_date=datetime.date(2017, 6, 13), num_pass=2, num_fail=1, )) self.assertEqual( summed_entries[2], self.ENTRY._replace( variant="linux-64-debug", start_date=datetime.date(2017, 6, 14), end_date=datetime.date(2017, 6, 17), num_pass=0, num_fail=1, )) self.assertEqual( summed_entries[3], self.ENTRY._replace( test="jstests/core/all2.js", start_date=datetime.date(2017, 6, 7), end_date=datetime.date(2017, 6, 13), num_pass=1, num_fail=0, ))
def test_group_daily_by_test(self): """ Tests that summarize_by() correctly accumulates by day. """ report = test_failures.Report(self.ENTRIES) summed_entries = report.summarize_by(test_failures.Report.TEST, time_period=test_failures.Report.DAILY) self.assertEqual(6, len(summed_entries)) self.assertEqual(summed_entries[0], self.ENTRY._replace( start_date=datetime.date(2017, 6, 3), end_date=datetime.date(2017, 6, 3), num_pass=1, num_fail=0, )) self.assertEqual(summed_entries[1], self.ENTRY._replace( task="jsCore", start_date=datetime.date(2017, 6, 5), end_date=datetime.date(2017, 6, 5), num_pass=0, num_fail=1, )) self.assertEqual(summed_entries[2], self.ENTRY._replace( start_date=datetime.date(2017, 6, 9), end_date=datetime.date(2017, 6, 9), num_pass=1, num_fail=0, )) self.assertEqual(summed_entries[3], self.ENTRY._replace( distro=test_failures.Wildcard("distros"), start_date=datetime.date(2017, 6, 10), end_date=datetime.date(2017, 6, 10), num_pass=1, num_fail=1, )) self.assertEqual(summed_entries[4], self.ENTRY._replace( variant="linux-64-debug", start_date=datetime.date(2017, 6, 17), end_date=datetime.date(2017, 6, 17), num_pass=0, num_fail=1, )) self.assertEqual(summed_entries[5], self.ENTRY._replace( test="jstests/core/all2.js", start_date=datetime.date(2017, 6, 10), end_date=datetime.date(2017, 6, 10), num_pass=1, num_fail=0, ))
def test_group_weekly_by_test_starting_on_monday(self): """ Tests that summarize_by() correctly accumulates by week when the beginning of the week is specified as the string "monday". """ report = test_failures.Report(self.ENTRIES) summed_entries = report.summarize_by( test_failures.Report.TEST, time_period=test_failures.Report.WEEKLY, start_day_of_week=test_failures.Report.MONDAY) self.assertEqual(4, len(summed_entries)) self.assertEqual( summed_entries[0], self.ENTRY._replace( start_date=datetime.date(2017, 6, 3), end_date=datetime.date(2017, 6, 4), num_pass=1, num_fail=0, )) self.assertEqual( summed_entries[1], self.ENTRY._replace( task=test_failures.Wildcard("tasks"), distro=test_failures.Wildcard("distros"), start_date=datetime.date(2017, 6, 5), end_date=datetime.date(2017, 6, 11), num_pass=2, num_fail=2, )) self.assertEqual( summed_entries[2], self.ENTRY._replace( variant="linux-64-debug", start_date=datetime.date(2017, 6, 12), end_date=datetime.date(2017, 6, 17), num_pass=0, num_fail=1, )) self.assertEqual( summed_entries[3], self.ENTRY._replace( test="jstests/core/all2.js", start_date=datetime.date(2017, 6, 5), end_date=datetime.date(2017, 6, 11), num_pass=1, num_fail=0, ))
def test_remain_unreliable(self): """ Tests that update_tags() preserves the tags for unreliable combinations. """ config = self.CONFIG._replace( test_fail_rates=self.CONFIG.test_fail_rates._replace( unacceptable=0.1), task_fail_rates=self.CONFIG.task_fail_rates._replace( unacceptable=0.1), variant_fail_rates=self.CONFIG.variant_fail_rates._replace( unacceptable=0.1), distro_fail_rates=self.CONFIG.distro_fail_rates._replace( unacceptable=0.1)) initial_tags = collections.OrderedDict([ ("jstests/core/all.js", [ "unreliable", "unreliable|jsCore_WT", "unreliable|jsCore_WT|linux-64", "unreliable|jsCore_WT|linux-64|rhel62", ]), ]) lifecycle = ci_tags.TagsConfig.from_dict( dict(selector=dict(js_test=copy.deepcopy(initial_tags)))) self.assertEqual(initial_tags, self.assert_has_only_js_tests(lifecycle)) report = test_failures.Report([ self.ENTRY._replace(num_pass=0, num_fail=1), self.ENTRY._replace(num_pass=0, num_fail=1, task="jsCore"), self.ENTRY._replace(num_pass=0, num_fail=1, variant="linux-64-debug"), self.ENTRY._replace(num_pass=1, num_fail=0), self.ENTRY._replace(num_pass=0, num_fail=1, distro="rhel55"), ]) update_test_lifecycle.validate_config(config) update_test_lifecycle.update_tags(lifecycle, config, report) updated_tags = self.assert_has_only_js_tests(lifecycle) self.assertEqual(updated_tags, initial_tags)
def main(): """ Utility for updating a resmoke.py tag file based on computing test failure rates from the Evergreen API. """ parser = optparse.OptionParser( description=textwrap.dedent(main.__doc__), usage="Usage: %prog [options] [test1 test2 ...]") data_options = optparse.OptionGroup( parser, title="Data options", description= ("Options used to configure what historical test failure data to retrieve from" " Evergreen.")) parser.add_option_group(data_options) data_options.add_option( "--project", dest="project", metavar="<project-name>", default=tf.TestHistory.DEFAULT_PROJECT, help="The Evergreen project to analyze. Defaults to '%default'.") data_options.add_option( "--tasks", dest="tasks", metavar="<task1,task2,...>", help= ("The Evergreen tasks to analyze for tagging unreliable tests. If specified in" " additional to having test positional arguments, then only tests that run under the" " specified Evergreen tasks will be analyzed. If omitted, then the list of tasks" " defaults to the non-excluded list of tasks from the specified" " --evergreenProjectConfig file.")) data_options.add_option( "--variants", dest="variants", metavar="<variant1,variant2,...>", default="", help= "The Evergreen build variants to analyze for tagging unreliable tests." ) data_options.add_option( "--distros", dest="distros", metavar="<distro1,distro2,...>", default="", help="The Evergreen distros to analyze for tagging unreliable tests.") data_options.add_option( "--evergreenProjectConfig", dest="evergreen_project_config", metavar="<project-config-file>", default="etc/evergreen.yml", help= ("The Evergreen project configuration file used to get the list of tasks if --tasks is" " omitted. Defaults to '%default'.")) model_options = optparse.OptionGroup( parser, title="Model options", description= ("Options used to configure whether (test,), (test, task)," " (test, task, variant), and (test, task, variant, distro) combinations are" " considered unreliable.")) parser.add_option_group(model_options) model_options.add_option( "--reliableTestMinRuns", type="int", dest="reliable_test_min_runs", metavar="<reliable-min-runs>", default=DEFAULT_CONFIG.reliable_min_runs, help= ("The minimum number of test executions required for a test's failure rate to" " determine whether the test is considered reliable. If a test has fewer than" " <reliable-min-runs> executions, then it cannot be considered unreliable." )) model_options.add_option( "--unreliableTestMinRuns", type="int", dest="unreliable_test_min_runs", metavar="<unreliable-min-runs>", default=DEFAULT_CONFIG.unreliable_min_runs, help= ("The minimum number of test executions required for a test's failure rate to" " determine whether the test is considered unreliable. If a test has fewer than" " <unreliable-min-runs> executions, then it cannot be considered unreliable." )) model_options.add_option( "--testFailRates", type="float", nargs=2, dest="test_fail_rates", metavar="<test-acceptable-fail-rate> <test-unacceptable-fail-rate>", default=DEFAULT_CONFIG.test_fail_rates, help= ("Controls how readily a test is considered unreliable. Each failure rate must be a" " number between 0 and 1 (inclusive) with" " <test-unacceptable-fail-rate> >= <test-acceptable-fail-rate>. If a test fails no" " more than <test-acceptable-fail-rate> in <reliable-days> time, then it is" " considered reliable. Otherwise, if a test fails at least as much as" " <test-unacceptable-fail-rate> in <test-unreliable-days> time, then it is considered" " unreliable. Defaults to %default.")) model_options.add_option( "--taskFailRates", type="float", nargs=2, dest="task_fail_rates", metavar="<task-acceptable-fail-rate> <task-unacceptable-fail-rate>", default=DEFAULT_CONFIG.task_fail_rates, help= ("Controls how readily a (test, task) combination is considered unreliable. Each" " failure rate must be a number between 0 and 1 (inclusive) with" " <task-unacceptable-fail-rate> >= <task-acceptable-fail-rate>. If a (test, task)" " combination fails no more than <task-acceptable-fail-rate> in <reliable-days> time," " then it is considered reliable. Otherwise, if a test fails at least as much as" " <task-unacceptable-fail-rate> in <unreliable-days> time, then it is considered" " unreliable. Defaults to %default.")) model_options.add_option( "--variantFailRates", type="float", nargs=2, dest="variant_fail_rates", metavar= "<variant-acceptable-fail-rate> <variant-unacceptable-fail-rate>", default=DEFAULT_CONFIG.variant_fail_rates, help= ("Controls how readily a (test, task, variant) combination is considered unreliable." " Each failure rate must be a number between 0 and 1 (inclusive) with" " <variant-unacceptable-fail-rate> >= <variant-acceptable-fail-rate>. If a" " (test, task, variant) combination fails no more than <variant-acceptable-fail-rate>" " in <reliable-days> time, then it is considered reliable. Otherwise, if a test fails" " at least as much as <variant-unacceptable-fail-rate> in <unreliable-days> time," " then it is considered unreliable. Defaults to %default.")) model_options.add_option( "--distroFailRates", type="float", nargs=2, dest="distro_fail_rates", metavar="<distro-acceptable-fail-rate> <distro-unacceptable-fail-rate>", default=DEFAULT_CONFIG.distro_fail_rates, help= ("Controls how readily a (test, task, variant, distro) combination is considered" " unreliable. Each failure rate must be a number between 0 and 1 (inclusive) with" " <distro-unacceptable-fail-rate> >= <distro-acceptable-fail-rate>. If a" " (test, task, variant, distro) combination fails no more than" " <distro-acceptable-fail-rate> in <reliable-days> time, then it is considered" " reliable. Otherwise, if a test fails at least as much as" " <distro-unacceptable-fail-rate> in <unreliable-days> time, then it is considered" " unreliable. Defaults to %default.")) model_options.add_option( "--reliableDays", type="int", dest="reliable_days", metavar="<ndays>", default=DEFAULT_CONFIG.reliable_time_period.days, help= ("The time period to analyze when determining if a test has become reliable. Defaults" " to %default day(s).")) model_options.add_option( "--unreliableDays", type="int", dest="unreliable_days", metavar="<ndays>", default=DEFAULT_CONFIG.unreliable_time_period.days, help= ("The time period to analyze when determining if a test has become unreliable." " Defaults to %default day(s).")) parser.add_option( "--resmokeTagFile", dest="tag_file", metavar="<tagfile>", default="etc/test_lifecycle.yml", help= ("The resmoke.py tag file to update. If --metadataRepo is specified, it" " is the relative path in the metadata repository, otherwise it can be" " an absolute path or a relative path from the current directory." " Defaults to '%default'.")) parser.add_option( "--metadataRepo", dest="metadata_repo_url", metavar="<metadata-repo-url>", default="[email protected]:mongodb/mongo-test-metadata.git", help=("The repository that contains the lifecycle file. " "It will be cloned in the current working directory. " "Defaults to '%default'.")) parser.add_option( "--referencesFile", dest="references_file", metavar="<references-file>", default="references.yml", help= ("The YAML file in the metadata repository that contains the revision " "mappings. Defaults to '%default'.")) parser.add_option( "--requestBatchSize", type="int", dest="batch_size", metavar="<batch-size>", default=100, help= ("The maximum number of tests to query the Evergreen API for in a single" " request. A higher value for this option will reduce the number of" " roundtrips between this client and Evergreen. Defaults to %default." )) commit_options = optparse.OptionGroup( parser, title="Commit options", description= ("Options used to configure whether and how to commit the updated test" " lifecycle tags.")) parser.add_option_group(commit_options) commit_options.add_option( "--commit", action="store_true", dest="commit", default=False, help="Indicates that the updated tag file should be committed.") commit_options.add_option( "--jiraConfig", dest="jira_config", metavar="<jira-config>", default=None, help= ("The YAML file containing the JIRA access configuration ('user', 'password'," "'server').")) commit_options.add_option( "--gitUserName", dest="git_user_name", metavar="<git-user-name>", default="Test Lifecycle", help= ("The git user name that will be set before committing to the metadata repository." " Defaults to '%default'.")) commit_options.add_option( "--gitUserEmail", dest="git_user_email", metavar="<git-user-email>", default="*****@*****.**", help= ("The git user email address that will be set before committing to the metadata" " repository. Defaults to '%default'.")) logging_options = optparse.OptionGroup( parser, title="Logging options", description= "Options used to configure the logging output of the script.") parser.add_option_group(logging_options) logging_options.add_option( "--logLevel", dest="log_level", metavar="<log-level>", choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="INFO", help= ("The log level. Accepted values are: DEBUG, INFO, WARNING and ERROR." " Defaults to '%default'.")) logging_options.add_option( "--logFile", dest="log_file", metavar="<log-file>", default=None, help= "The destination file for the logs output. Defaults to the standard output." ) (options, tests) = parser.parse_args() if options.distros: warnings.warn(( "Until https://jira.mongodb.org/browse/EVG-1665 is implemented, distro information" " isn't returned by the Evergreen API. This option will therefore be ignored." ), RuntimeWarning) logging.basicConfig(format="%(asctime)s %(levelname)s %(message)s", level=options.log_level, filename=options.log_file) evg_conf = ci_evergreen.EvergreenProjectConfig( options.evergreen_project_config) use_test_tasks_membership = False tasks = options.tasks.split(",") if options.tasks else [] if not tasks: # If no tasks are specified, then the list of tasks is all. tasks = evg_conf.lifecycle_task_names use_test_tasks_membership = True variants = options.variants.split(",") if options.variants else [] distros = options.distros.split(",") if options.distros else [] config = Config( test_fail_rates=Rates(*options.test_fail_rates), task_fail_rates=Rates(*options.task_fail_rates), variant_fail_rates=Rates(*options.variant_fail_rates), distro_fail_rates=Rates(*options.distro_fail_rates), reliable_min_runs=options.reliable_test_min_runs, reliable_time_period=datetime.timedelta(days=options.reliable_days), unreliable_min_runs=options.unreliable_test_min_runs, unreliable_time_period=datetime.timedelta( days=options.unreliable_days)) validate_config(config) lifecycle_tags_file = make_lifecycle_tags_file(options, config) if not lifecycle_tags_file: sys.exit(1) test_tasks_membership = get_test_tasks_membership(evg_conf) # If no tests are specified then the list of tests is generated from the list of tasks. if not tests: tests = get_tests_from_tasks(tasks, test_tasks_membership) if not options.tasks: use_test_tasks_membership = True commit_first, commit_last = git_commit_range_since("{}.days".format( options.unreliable_days)) commit_prior = git_commit_prior(commit_first) # For efficiency purposes, group the tests and process in batches of batch_size. test_groups = create_batch_groups(create_test_groups(tests), options.batch_size) LOGGER.info("Updating the tags") for tests in test_groups: # Find all associated tasks for the test_group if tasks or tests were not specified. if use_test_tasks_membership: tasks_set = set() for test in tests: tasks_set = tasks_set.union(test_tasks_membership[test]) tasks = list(tasks_set) if not tasks: LOGGER.warning("No tasks found for tests %s, skipping this group.", tests) continue test_history = tf.TestHistory(project=options.project, tests=tests, tasks=tasks, variants=variants, distros=distros) history_data = test_history.get_history_by_revision( start_revision=commit_prior, end_revision=commit_last) report = tf.Report(history_data) update_tags(lifecycle_tags_file.changelog_lifecycle, config, report) # Remove tags that are no longer relevant clean_up_tags(lifecycle_tags_file.changelog_lifecycle, evg_conf) # We write the 'lifecycle' tag configuration to the 'options.lifecycle_file' file only if there # have been changes to the tags. In particular, we avoid modifying the file when only the header # comment for the YAML file would change. if lifecycle_tags_file.is_modified(): lifecycle_tags_file.write() if options.commit: commit_ok = lifecycle_tags_file.commit() if not commit_ok: sys.exit(1) else: LOGGER.info("The tags have not been modified.")
def main(): """ Utility for updating a resmoke.py tag file based on computing test failure rates from the Evergreen API. """ parser = optparse.OptionParser( description=textwrap.dedent(main.__doc__), usage="Usage: %prog [options] [test1 test2 ...]") data_options = optparse.OptionGroup( parser, title="Data options", description= ("Options used to configure what historical test failure data to retrieve from" " Evergreen.")) parser.add_option_group(data_options) data_options.add_option( "--project", dest="project", metavar="<project-name>", default=tf.TestHistory.DEFAULT_PROJECT, help="The Evergreen project to analyze. Defaults to '%default'.") data_options.add_option( "--tasks", dest="tasks", metavar="<task1,task2,...>", help= ("The Evergreen tasks to analyze for tagging unreliable tests. If specified in" " additional to having test positional arguments, then only tests that run under the" " specified Evergreen tasks will be analyzed. If omitted, then the list of tasks" " defaults to the non-excluded list of tasks from the specified" " --evergreenProjectConfig file.")) data_options.add_option( "--variants", dest="variants", metavar="<variant1,variant2,...>", default="", help= "The Evergreen build variants to analyze for tagging unreliable tests." ) data_options.add_option( "--distros", dest="distros", metavar="<distro1,distro2,...>", default="", help="The Evergreen distros to analyze for tagging unreliable tests.") data_options.add_option( "--evergreenProjectConfig", dest="evergreen_project_config", metavar="<project-config-file>", default="etc/evergreen.yml", help= ("The Evergreen project configuration file used to get the list of tasks if --tasks is" " omitted. Defaults to '%default'.")) model_options = optparse.OptionGroup( parser, title="Model options", description= ("Options used to configure whether (test,), (test, task)," " (test, task, variant), and (test, task, variant, distro) combinations are" " considered unreliable.")) parser.add_option_group(model_options) model_options.add_option( "--reliableTestMinRuns", type="int", dest="reliable_test_min_runs", metavar="<reliable-min-runs>", default=DEFAULT_CONFIG.reliable_min_runs, help= ("The minimum number of test executions required for a test's failure rate to" " determine whether the test is considered reliable. If a test has fewer than" " <reliable-min-runs> executions, then it cannot be considered unreliable." )) model_options.add_option( "--unreliableTestMinRuns", type="int", dest="unreliable_test_min_runs", metavar="<unreliable-min-runs>", default=DEFAULT_CONFIG.unreliable_min_runs, help= ("The minimum number of test executions required for a test's failure rate to" " determine whether the test is considered unreliable. If a test has fewer than" " <unreliable-min-runs> executions, then it cannot be considered unreliable." )) model_options.add_option( "--testFailRates", type="float", nargs=2, dest="test_fail_rates", metavar="<test-acceptable-fail-rate> <test-unacceptable-fail-rate>", default=DEFAULT_CONFIG.test_fail_rates, help= ("Controls how readily a test is considered unreliable. Each failure rate must be a" " number between 0 and 1 (inclusive) with" " <test-unacceptable-fail-rate> >= <test-acceptable-fail-rate>. If a test fails no" " more than <test-acceptable-fail-rate> in <reliable-days> time, then it is" " considered reliable. Otherwise, if a test fails at least as much as" " <test-unacceptable-fail-rate> in <test-unreliable-days> time, then it is considered" " unreliable. Defaults to %default.")) model_options.add_option( "--taskFailRates", type="float", nargs=2, dest="task_fail_rates", metavar="<task-acceptable-fail-rate> <task-unacceptable-fail-rate>", default=DEFAULT_CONFIG.task_fail_rates, help= ("Controls how readily a (test, task) combination is considered unreliable. Each" " failure rate must be a number between 0 and 1 (inclusive) with" " <task-unacceptable-fail-rate> >= <task-acceptable-fail-rate>. If a (test, task)" " combination fails no more than <task-acceptable-fail-rate> in <reliable-days> time," " then it is considered reliable. Otherwise, if a test fails at least as much as" " <task-unacceptable-fail-rate> in <unreliable-days> time, then it is considered" " unreliable. Defaults to %default.")) model_options.add_option( "--variantFailRates", type="float", nargs=2, dest="variant_fail_rates", metavar= "<variant-acceptable-fail-rate> <variant-unacceptable-fail-rate>", default=DEFAULT_CONFIG.variant_fail_rates, help= ("Controls how readily a (test, task, variant) combination is considered unreliable." " Each failure rate must be a number between 0 and 1 (inclusive) with" " <variant-unacceptable-fail-rate> >= <variant-acceptable-fail-rate>. If a" " (test, task, variant) combination fails no more than <variant-acceptable-fail-rate>" " in <reliable-days> time, then it is considered reliable. Otherwise, if a test fails" " at least as much as <variant-unacceptable-fail-rate> in <unreliable-days> time," " then it is considered unreliable. Defaults to %default.")) model_options.add_option( "--distroFailRates", type="float", nargs=2, dest="distro_fail_rates", metavar="<distro-acceptable-fail-rate> <distro-unacceptable-fail-rate>", default=DEFAULT_CONFIG.distro_fail_rates, help= ("Controls how readily a (test, task, variant, distro) combination is considered" " unreliable. Each failure rate must be a number between 0 and 1 (inclusive) with" " <distro-unacceptable-fail-rate> >= <distro-acceptable-fail-rate>. If a" " (test, task, variant, distro) combination fails no more than" " <distro-acceptable-fail-rate> in <reliable-days> time, then it is considered" " reliable. Otherwise, if a test fails at least as much as" " <distro-unacceptable-fail-rate> in <unreliable-days> time, then it is considered" " unreliable. Defaults to %default.")) model_options.add_option( "--reliableDays", type="int", dest="reliable_days", metavar="<ndays>", default=DEFAULT_CONFIG.reliable_time_period.days, help= ("The time period to analyze when determining if a test has become reliable. Defaults" " to %default day(s).")) model_options.add_option( "--unreliableDays", type="int", dest="unreliable_days", metavar="<ndays>", default=DEFAULT_CONFIG.unreliable_time_period.days, help= ("The time period to analyze when determining if a test has become unreliable." " Defaults to %default day(s).")) parser.add_option( "--resmokeTagFile", dest="tag_file", metavar="<tagfile>", default="etc/test_lifecycle.yml", help="The resmoke.py tag file to update. Defaults to '%default'.") parser.add_option( "--requestBatchSize", type="int", dest="batch_size", metavar="<batch-size>", default=100, help= ("The maximum number of tests to query the Evergreen API for in a single" " request. A higher value for this option will reduce the number of" " roundtrips between this client and Evergreen. Defaults to %default." )) (options, tests) = parser.parse_args() if options.distros: warnings.warn(( "Until https://jira.mongodb.org/browse/EVG-1665 is implemented, distro information" " isn't returned by the Evergreen API. This option will therefore be ignored." ), RuntimeWarning) evg_conf = ci_evergreen.EvergreenProjectConfig( options.evergreen_project_config) use_test_tasks_membership = False tasks = options.tasks.split(",") if options.tasks else [] if not tasks: # If no tasks are specified, then the list of tasks is all. tasks = evg_conf.lifecycle_task_names use_test_tasks_membership = True variants = options.variants.split(",") if options.variants else [] distros = options.distros.split(",") if options.distros else [] config = Config( test_fail_rates=Rates(*options.test_fail_rates), task_fail_rates=Rates(*options.task_fail_rates), variant_fail_rates=Rates(*options.variant_fail_rates), distro_fail_rates=Rates(*options.distro_fail_rates), reliable_min_runs=options.reliable_test_min_runs, reliable_time_period=datetime.timedelta(days=options.reliable_days), unreliable_min_runs=options.unreliable_test_min_runs, unreliable_time_period=datetime.timedelta( days=options.unreliable_days)) validate_config(config) lifecycle = ci_tags.TagsConfig.from_file(options.tag_file, cmp_func=compare_tags) test_tasks_membership = get_test_tasks_membership(evg_conf) # If no tests are specified then the list of tests is generated from the list of tasks. if not tests: tests = get_tests_from_tasks(tasks, test_tasks_membership) if not options.tasks: use_test_tasks_membership = True commit_first, commit_last = git_commit_range_since("{}.days".format( options.unreliable_days)) commit_prior = git_commit_prior(commit_first) # For efficiency purposes, group the tests and process in batches of batch_size. test_groups = create_batch_groups(create_test_groups(tests), options.batch_size) for tests in test_groups: # Find all associated tasks for the test_group if tasks or tests were not specified. if use_test_tasks_membership: tasks_set = set() for test in tests: tasks_set = tasks_set.union(test_tasks_membership[test]) tasks = list(tasks_set) if not tasks: print( "Warning - No tasks found for tests {}, skipping this group.". format(tests)) continue test_history = tf.TestHistory(project=options.project, tests=tests, tasks=tasks, variants=variants, distros=distros) history_data = test_history.get_history_by_revision( start_revision=commit_prior, end_revision=commit_last) report = tf.Report(history_data) update_tags(lifecycle, config, report) # Remove tags that are no longer relevant cleanup_tags(lifecycle, evg_conf) # We write the 'lifecycle' tag configuration to the 'options.lifecycle_file' file only if there # have been changes to the tags. In particular, we avoid modifying the file when only the header # comment for the YAML file would change. if lifecycle.is_modified(): write_yaml_file(options.tag_file, lifecycle)