def start(self): """Start all clusters for the first time. On the first start, we will just kill hard any mongod processes as quickly as possible. (They would most likely be left running by a previous evergreen task and will be wiped out anyway.) See :method:`restart` if this is not a clean start. """ self.destroy(self.sigterm_ms) # The downloader will download MongoDB binaries if a URL was provided in the # ConfigDict. if not self.downloader.download_and_extract(): LOG.error("Download and extract failed.") return False LOG.info("Mongodb_setup running pre_cluster_start commands") run_pre_post_commands('pre_cluster_start', [self.config['mongodb_setup']], self.config, EXCEPTION_BEHAVIOR.EXIT) status = self._start() # Start Atlas clusters using config given in mongodb_setup.atlas (if any). status = status and self.atlas.start() LOG.info("Mongodb_setup running post_cluster_start commands") # Exit also here. Among other things it causes setup failure in Evergreen. run_pre_post_commands('post_cluster_start', [self.config['mongodb_setup']], self.config, EXCEPTION_BEHAVIOR.EXIT) return status
def test_run_pre_post(self, mock_run_host_command): """Test test_control.run_pre_post_commands()""" command_dicts = [self.config['test_control'], self.config['mongodb_setup']] run_pre_post_commands('post_test', command_dicts, self.config, EXCEPTION_BEHAVIOR.EXIT) # expected_args = ['on_workload_client', 'on_all_servers', 'on_mongod', 'on_configsvr'] expected_args = ['on_mongod', 'on_all_hosts', 'on_all_servers', 'on_mongod', 'on_configsvr'] observed_args = [] for args in mock_run_host_command.call_args_list: observed_args.append(args[0][0]) self.assertEqual(observed_args, expected_args)
def setup_cluster(self): """ Runs terraform to provision the cluster """ # pylint: disable=too-many-statements # Create and copy needed security.tf and terraform.tf files into current work directory self.setup_security_tf() self.setup_terraform_tf() if self.reuse_cluster: self.setup_evg_dir() LOG.info('terraform: init') subprocess.check_call([self.terraform, 'init', '-upgrade'], stdout=self.stdout, stderr=self.stderr) tf_config = TerraformConfiguration(self.config) tf_config.to_json(file_name=CLUSTER_JSON) # pylint: disable=no-member self.var_file = '-var-file={}'.format(CLUSTER_JSON) if self.existing: LOG.info('Reusing AWS cluster.', cluster=self.cluster) else: LOG.info('Creating AWS cluster.', cluster=self.cluster) LOG.info('terraform: apply') terraform_command = [ self.terraform, 'apply', self.var_file, self.parallelism, '-auto-approve' ] # Disk warmup for initialsync-logkeeper takes about 4 hours. This will save # about $12 by delaying deployment of the two other nodes. if not self.existing and self.cluster == 'initialsync-logkeeper': terraform_command.extend([ '-var=mongod_ebs_instance_count=0', '-var=workload_instance_count=0' ]) try: subprocess.check_call(terraform_command, stdout=self.stdout, stderr=self.stderr) if not self.existing and self.cluster == 'initialsync-logkeeper': subprocess.check_call([ self.terraform, 'apply', self.var_file, self.parallelism, '-auto-approve' ], stdout=self.stdout, stderr=self.stderr) LOG.info('terraform: refresh') subprocess.check_call([self.terraform, 'refresh', self.var_file], stdout=self.stdout, stderr=self.stderr) LOG.info('terraform: plan') subprocess.check_call( [self.terraform, 'plan', '-detailed-exitcode', self.var_file], stdout=self.stdout, stderr=self.stderr) LOG.info('terraform: output') terraform_output = run_and_save_output([self.terraform, 'output']) LOG.debug(terraform_output) tf_parser = TerraformOutputParser( config=self.config, terraform_output=terraform_output) tf_parser.write_output_files() if self.reuse_cluster: self.save_terraform_state() if self.existing: # Delays should be unset at the end of each test_control.py run, # but if it didn't complete... safe_reset_all_delays(self.config) # Write hostnames to /etc/hosts self.setup_hostnames() with open('infrastructure_provisioning.out.yml', 'r') as provisioning_out_yaml: LOG.info('Contents of infrastructure_provisioning.out.yml:') LOG.info(provisioning_out_yaml.read()) LOG.info("EC2 resources provisioned/updated successfully.") # Run post provisioning scripts. run_pre_post_commands("post_provisioning", [self.config['infrastructure_provisioning']], self.config, EXCEPTION_BEHAVIOR.EXIT) except Exception as exception: LOG.error("Failed to provision EC2 resources.", exc_info=True) if self.stderr is not None: self.stderr.close() self.print_terraform_errors() LOG.error("Releasing any EC2 resources that did deploy.") destroy_resources() rmtree_when_present(self.evg_data_dir) raise exception
def help_trace_function(self, mock_function, mock_command_dicts, mock_extract_hosts, mock_ssh): """ Test test_control.print_trace by calling run_pre_post_commands with a 'pre_task' key, with a forced exception. This is a helper function used by other tests within this class. It uses a mocked RemoteHost along with a mocked function within the RemoteHost that has a forced exception in it. :param MagicMock() mock_function: mocked function from mock_remote_host :param list(ConfigDict) mock_command_dicts: List of ConfigDict objects that have a 'pre_task' key. :param MagicMock() mock_extract_hosts: DO NOT INPUT IN FUNCTION, patch decorator already inputs this argument into the function :param MagicMock() mock_ssh: DO NOT INPUT IN FUNCTION, patch decorator already inputs this argument into the function """ mock_config = { 'infrastructure_provisioning': { 'tfvars': { 'ssh_user': '******', 'ssh_key_file': 'mock_ssh_key' } }, 'mongodb_setup': { 'meta': { 'net': {}, }, 'authentication': { 'enabled': True, 'username': '******', 'password': '******', }, }, } with LogCapture(level=logging.ERROR) as log_capture: # LogCapture captures all log output into the object log_capture. level specifies which # log level to detect. logging.ERROR will cause log_capture to only contain logs # outputted with the ERROR level or higher. The patch on common.host_factory.make_host # mocks the function and is called within run_commands: # (pre_task -> dispatch_commands -> run_host_command -> make_host) # The mock_function.side_effect causes it to raise an Exception causing print_trace # to log the proper information. mock_function will be called within run_command or # _run_host_command_map depending on mock_command_dicts. run_pre_post_commands exits # with code 1 on exception when given EXCEPTION_BEHAVIOR.EXIT, so self.assertRaises() # catches this. The asserts check if the mock_function, extract_hosts, and make_host # were called along with asserting the error code was 1. return_value = RemoteHost(None, None, None) # disabling yapf here because pylint and yapf disagree on indentation convention # yapf: disable with patch( 'common.host_factory.make_host', return_value=return_value) as mock_make_host: # yapf: enable mock_function.side_effect = Exception("Mock Exception") with self.assertRaises(SystemExit) as exception: run_pre_post_commands('pre_task', mock_command_dicts, mock_config, EXCEPTION_BEHAVIOR.EXIT) self.assertTrue(mock_function.called) self.assertTrue(mock_extract_hosts.called) self.assertTrue(mock_make_host.called) self.assertTrue(mock_ssh.called) self.assertEqual(exception.exception.code, 1) task = list(mock_command_dicts[0]['pre_task'][0].keys())[0] command = mock_command_dicts[0]['pre_task'][0][task] error_regex_str = "Exception originated in: .+" error_regex_str = error_regex_str + "\nException msg:.*?" error_regex_str = error_regex_str + "\nrun_pre_post_commands:\n " error_regex_str = error_regex_str + "in task: " + task + "\n " error_regex_str = error_regex_str + "in command: " + re.escape( str(command)) error_pattern = re.compile(error_regex_str) list_errors = list(log_capture.actual() ) # Get actual string held by loc_capture object self.assertRegex(list_errors[0][2], error_pattern)
def run_tests(config): """Main logic to run tests :return: True if all tests failed or an error occurred. No more tests are run when an error is encountered. """ config['test_control']['out'] = {'exit_codes': {}} test_control_config = config['test_control'] mongodb_setup_config = config['mongodb_setup'] setup_ssh_agent(config) prepare_reports_dir() validate_config(config) run_pre_post_commands('pre_task', [mongodb_setup_config, test_control_config], config, EXCEPTION_BEHAVIOR.EXIT) if 'test_delay_seconds' in test_control_config: test_delay_seconds = test_control_config['test_delay_seconds'] else: test_delay_seconds = 0 num_tests_run = 0 num_tests_failed = 0 # cedar reporting report = cedar.Report(config.get('runtime')) # Default the status to ERROR to catch unexpected failures. # If a tests succeeds, the status is explicitly set to SUCCESS. cur_test_status = TestStatus.ERROR try: if os.path.exists('perf.json'): os.remove('perf.json') LOG.warning("Found old perf.json file. Overwriting.") for test in test_control_config['run']: background_tasks = [] LOG.info('running test %s', test) timer = {} try: # Only run between_tests after the first test. if num_tests_run > 0: run_pre_post_commands( 'between_tests', [mongodb_setup_config, test_control_config], config, EXCEPTION_BEHAVIOR.RERAISE) run_pre_post_commands( 'pre_test', [mongodb_setup_config, test_control_config, test], config, EXCEPTION_BEHAVIOR.RERAISE, test['id']) background_tasks = start_background_tasks( config, test, test['id']) if test_delay_seconds: LOG.info("Sleeping for %s seconds before test %s", test_delay_seconds, test['id']) time.sleep(test_delay_seconds) LOG.info("Starting test %s", test['id']) timer['start'] = time.time() # Run the actual test run_test(test, config) except subprocess.CalledProcessError: LOG.error("test %s failed.", test['id'], exc_info=1) cur_test_status = TestStatus.FAILED except: # pylint: disable=bare-except LOG.error("Unexpected failure in test %s.", test['id'], exc_info=1) cur_test_status = TestStatus.ERROR else: cur_test_status = TestStatus.SUCCESS num_tests_run += 1 timer['end'] = time.time() try: stop_background_tasks(background_tasks) if 'skip_validate' not in test or not test['skip_validate']: run_validate(config, test['id']) run_pre_post_commands( 'post_test', [test, test_control_config, mongodb_setup_config], config, EXCEPTION_BEHAVIOR.CONTINUE, test['id']) except: # pylint: disable=bare-except # The post test activities failing implies the test failing. LOG.error("Post-test activities failed after test %s.", test['id'], exc_info=1) # Don't "downgrade" from ERROR to FAILED. if cur_test_status != TestStatus.ERROR: cur_test_status = TestStatus.FAILED safe_reset_all_delays(config) if cur_test_status == TestStatus.FAILED: num_tests_failed += 1 LOG.warning( "Unsuccessful test run for test %s. Parsing results now", test['id']) elif cur_test_status == TestStatus.ERROR: LOG.warning("Unknown error in test %s, exiting early.", test['id']) break else: LOG.info( "Successful test run for test %s. Parsing results now", test['id']) _, cedar_tests = parse_test_results(test, config, timer) for cedar_test in cedar_tests: report.add_test(cedar_test) except Exception as e: # pylint: disable=broad-except LOG.error('Unexcepted exception: %s', repr(e), exc_info=1) finally: # Save exit codes for analysis.py config.save() # Cedar # TODO: Encapsulate Cedar into workload_output_parser report.write_report() run_pre_post_commands('post_task', [test_control_config, mongodb_setup_config], config, EXCEPTION_BEHAVIOR.CONTINUE) # Print perf.json to screen print_perf_json(config['test_control']['perf_json']['path']) LOG.info("%s of %s tests exited with an error.", num_tests_failed, num_tests_run) # Return True if all tests failed or if the last test errored. return (num_tests_run == num_tests_failed) or (cur_test_status == TestStatus.ERROR)