예제 #1
0
    def start(self):
        """Start all clusters for the first time.
           On the first start, we will just kill hard any mongod processes as quickly as
           possible. (They would most likely be left running by a previous evergreen task
           and will be wiped out anyway.)
          See :method:`restart` if this is not a clean start.
        """
        self.destroy(self.sigterm_ms)

        # The downloader will download MongoDB binaries if a URL was provided in the
        # ConfigDict.
        if not self.downloader.download_and_extract():
            LOG.error("Download and extract failed.")
            return False

        LOG.info("Mongodb_setup running pre_cluster_start commands")
        run_pre_post_commands('pre_cluster_start',
                              [self.config['mongodb_setup']], self.config,
                              EXCEPTION_BEHAVIOR.EXIT)

        status = self._start()
        # Start Atlas clusters using config given in mongodb_setup.atlas (if any).
        status = status and self.atlas.start()

        LOG.info("Mongodb_setup running post_cluster_start commands")
        # Exit also here. Among other things it causes setup failure in Evergreen.
        run_pre_post_commands('post_cluster_start',
                              [self.config['mongodb_setup']], self.config,
                              EXCEPTION_BEHAVIOR.EXIT)

        return status
예제 #2
0
    def test_run_pre_post(self, mock_run_host_command):
        """Test test_control.run_pre_post_commands()"""
        command_dicts = [self.config['test_control'], self.config['mongodb_setup']]
        run_pre_post_commands('post_test', command_dicts, self.config, EXCEPTION_BEHAVIOR.EXIT)

        # expected_args = ['on_workload_client', 'on_all_servers', 'on_mongod', 'on_configsvr']
        expected_args = ['on_mongod', 'on_all_hosts', 'on_all_servers', 'on_mongod', 'on_configsvr']
        observed_args = []
        for args in mock_run_host_command.call_args_list:
            observed_args.append(args[0][0])
        self.assertEqual(observed_args, expected_args)
예제 #3
0
    def setup_cluster(self):
        """
        Runs terraform to provision the cluster
        """
        # pylint: disable=too-many-statements
        # Create and copy needed security.tf and terraform.tf files into current work directory
        self.setup_security_tf()
        self.setup_terraform_tf()
        if self.reuse_cluster:
            self.setup_evg_dir()
        LOG.info('terraform: init')
        subprocess.check_call([self.terraform, 'init', '-upgrade'],
                              stdout=self.stdout,
                              stderr=self.stderr)
        tf_config = TerraformConfiguration(self.config)
        tf_config.to_json(file_name=CLUSTER_JSON)  # pylint: disable=no-member
        self.var_file = '-var-file={}'.format(CLUSTER_JSON)
        if self.existing:
            LOG.info('Reusing AWS cluster.', cluster=self.cluster)
        else:
            LOG.info('Creating AWS cluster.', cluster=self.cluster)
        LOG.info('terraform: apply')
        terraform_command = [
            self.terraform, 'apply', self.var_file, self.parallelism,
            '-auto-approve'
        ]
        # Disk warmup for initialsync-logkeeper takes about 4 hours. This will save
        # about $12 by delaying deployment of the two other nodes.
        if not self.existing and self.cluster == 'initialsync-logkeeper':
            terraform_command.extend([
                '-var=mongod_ebs_instance_count=0',
                '-var=workload_instance_count=0'
            ])
        try:
            subprocess.check_call(terraform_command,
                                  stdout=self.stdout,
                                  stderr=self.stderr)
            if not self.existing and self.cluster == 'initialsync-logkeeper':
                subprocess.check_call([
                    self.terraform, 'apply', self.var_file, self.parallelism,
                    '-auto-approve'
                ],
                                      stdout=self.stdout,
                                      stderr=self.stderr)
            LOG.info('terraform: refresh')
            subprocess.check_call([self.terraform, 'refresh', self.var_file],
                                  stdout=self.stdout,
                                  stderr=self.stderr)
            LOG.info('terraform: plan')
            subprocess.check_call(
                [self.terraform, 'plan', '-detailed-exitcode', self.var_file],
                stdout=self.stdout,
                stderr=self.stderr)
            LOG.info('terraform: output')
            terraform_output = run_and_save_output([self.terraform, 'output'])
            LOG.debug(terraform_output)
            tf_parser = TerraformOutputParser(
                config=self.config, terraform_output=terraform_output)
            tf_parser.write_output_files()

            if self.reuse_cluster:
                self.save_terraform_state()

            if self.existing:
                # Delays should be unset at the end of each test_control.py run,
                # but if it didn't complete...
                safe_reset_all_delays(self.config)
            # Write hostnames to /etc/hosts
            self.setup_hostnames()
            with open('infrastructure_provisioning.out.yml',
                      'r') as provisioning_out_yaml:
                LOG.info('Contents of infrastructure_provisioning.out.yml:')
                LOG.info(provisioning_out_yaml.read())
            LOG.info("EC2 resources provisioned/updated successfully.")
            # Run post provisioning scripts.
            run_pre_post_commands("post_provisioning",
                                  [self.config['infrastructure_provisioning']],
                                  self.config, EXCEPTION_BEHAVIOR.EXIT)

        except Exception as exception:
            LOG.error("Failed to provision EC2 resources.", exc_info=True)
            if self.stderr is not None:
                self.stderr.close()
            self.print_terraform_errors()
            LOG.error("Releasing any EC2 resources that did deploy.")
            destroy_resources()
            rmtree_when_present(self.evg_data_dir)
            raise exception
예제 #4
0
 def help_trace_function(self, mock_function, mock_command_dicts,
                         mock_extract_hosts, mock_ssh):
     """
     Test test_control.print_trace by calling run_pre_post_commands with a 'pre_task' key, with a
     forced exception. This is a helper function used by other tests within this class. It uses
     a mocked RemoteHost along with a mocked function within the RemoteHost that has a forced
     exception in it.
     :param MagicMock() mock_function: mocked function from mock_remote_host
     :param list(ConfigDict) mock_command_dicts: List of ConfigDict objects that have a
     'pre_task' key.
     :param MagicMock() mock_extract_hosts: DO NOT INPUT IN FUNCTION, patch decorator already
     inputs this argument into the function
     :param MagicMock() mock_ssh: DO NOT INPUT IN FUNCTION, patch decorator already inputs this
     argument into the function
     """
     mock_config = {
         'infrastructure_provisioning': {
             'tfvars': {
                 'ssh_user': '******',
                 'ssh_key_file': 'mock_ssh_key'
             }
         },
         'mongodb_setup': {
             'meta': {
                 'net': {},
             },
             'authentication': {
                 'enabled': True,
                 'username': '******',
                 'password': '******',
             },
         },
     }
     with LogCapture(level=logging.ERROR) as log_capture:
         # LogCapture captures all log output into the object log_capture. level specifies which
         # log level to detect. logging.ERROR will cause log_capture to only contain logs
         # outputted with the ERROR level or higher. The patch on common.host_factory.make_host
         # mocks the function and is called within run_commands:
         # (pre_task -> dispatch_commands -> run_host_command -> make_host)
         # The mock_function.side_effect causes it to raise an Exception causing print_trace
         # to log the proper information. mock_function will be called within run_command or
         # _run_host_command_map depending on mock_command_dicts. run_pre_post_commands exits
         # with code 1 on exception when given EXCEPTION_BEHAVIOR.EXIT, so self.assertRaises()
         # catches this. The asserts check if the mock_function, extract_hosts, and make_host
         # were called along with asserting the error code was 1.
         return_value = RemoteHost(None, None, None)
         # disabling yapf here because pylint and yapf disagree on indentation convention
         # yapf: disable
         with patch(
                 'common.host_factory.make_host', return_value=return_value) as mock_make_host:
             # yapf: enable
             mock_function.side_effect = Exception("Mock Exception")
             with self.assertRaises(SystemExit) as exception:
                 run_pre_post_commands('pre_task', mock_command_dicts,
                                       mock_config, EXCEPTION_BEHAVIOR.EXIT)
             self.assertTrue(mock_function.called)
             self.assertTrue(mock_extract_hosts.called)
             self.assertTrue(mock_make_host.called)
             self.assertTrue(mock_ssh.called)
             self.assertEqual(exception.exception.code, 1)
     task = list(mock_command_dicts[0]['pre_task'][0].keys())[0]
     command = mock_command_dicts[0]['pre_task'][0][task]
     error_regex_str = "Exception originated in: .+"
     error_regex_str = error_regex_str + "\nException msg:.*?"
     error_regex_str = error_regex_str + "\nrun_pre_post_commands:\n    "
     error_regex_str = error_regex_str + "in task: " + task + "\n        "
     error_regex_str = error_regex_str + "in command: " + re.escape(
         str(command))
     error_pattern = re.compile(error_regex_str)
     list_errors = list(log_capture.actual()
                        )  # Get actual string held by loc_capture object
     self.assertRegex(list_errors[0][2], error_pattern)
예제 #5
0
def run_tests(config):
    """Main logic to run tests

    :return: True if all tests failed or an error occurred.
             No more tests are run when an error is encountered.
    """
    config['test_control']['out'] = {'exit_codes': {}}
    test_control_config = config['test_control']
    mongodb_setup_config = config['mongodb_setup']

    setup_ssh_agent(config)
    prepare_reports_dir()

    validate_config(config)
    run_pre_post_commands('pre_task',
                          [mongodb_setup_config, test_control_config], config,
                          EXCEPTION_BEHAVIOR.EXIT)

    if 'test_delay_seconds' in test_control_config:
        test_delay_seconds = test_control_config['test_delay_seconds']
    else:
        test_delay_seconds = 0

    num_tests_run = 0
    num_tests_failed = 0

    # cedar reporting
    report = cedar.Report(config.get('runtime'))

    # Default the status to ERROR to catch unexpected failures.
    # If a tests succeeds, the status is explicitly set to SUCCESS.
    cur_test_status = TestStatus.ERROR

    try:
        if os.path.exists('perf.json'):
            os.remove('perf.json')
            LOG.warning("Found old perf.json file. Overwriting.")

        for test in test_control_config['run']:
            background_tasks = []
            LOG.info('running test %s', test)
            timer = {}
            try:
                # Only run between_tests after the first test.
                if num_tests_run > 0:
                    run_pre_post_commands(
                        'between_tests',
                        [mongodb_setup_config, test_control_config], config,
                        EXCEPTION_BEHAVIOR.RERAISE)
                run_pre_post_commands(
                    'pre_test',
                    [mongodb_setup_config, test_control_config, test], config,
                    EXCEPTION_BEHAVIOR.RERAISE, test['id'])
                background_tasks = start_background_tasks(
                    config, test, test['id'])

                if test_delay_seconds:
                    LOG.info("Sleeping for %s seconds before test %s",
                             test_delay_seconds, test['id'])
                    time.sleep(test_delay_seconds)

                LOG.info("Starting test %s", test['id'])
                timer['start'] = time.time()
                # Run the actual test
                run_test(test, config)
            except subprocess.CalledProcessError:
                LOG.error("test %s failed.", test['id'], exc_info=1)
                cur_test_status = TestStatus.FAILED
            except:  # pylint: disable=bare-except
                LOG.error("Unexpected failure in test %s.",
                          test['id'],
                          exc_info=1)
                cur_test_status = TestStatus.ERROR
            else:
                cur_test_status = TestStatus.SUCCESS

            num_tests_run += 1
            timer['end'] = time.time()

            try:
                stop_background_tasks(background_tasks)
                if 'skip_validate' not in test or not test['skip_validate']:
                    run_validate(config, test['id'])
                run_pre_post_commands(
                    'post_test',
                    [test, test_control_config, mongodb_setup_config], config,
                    EXCEPTION_BEHAVIOR.CONTINUE, test['id'])
            except:  # pylint: disable=bare-except
                # The post test activities failing implies the test failing.
                LOG.error("Post-test activities failed after test %s.",
                          test['id'],
                          exc_info=1)

                # Don't "downgrade" from ERROR to FAILED.
                if cur_test_status != TestStatus.ERROR:
                    cur_test_status = TestStatus.FAILED

            safe_reset_all_delays(config)

            if cur_test_status == TestStatus.FAILED:
                num_tests_failed += 1
                LOG.warning(
                    "Unsuccessful test run for test %s. Parsing results now",
                    test['id'])
            elif cur_test_status == TestStatus.ERROR:
                LOG.warning("Unknown error in test %s, exiting early.",
                            test['id'])
                break
            else:
                LOG.info(
                    "Successful test run for test %s. Parsing results now",
                    test['id'])

            _, cedar_tests = parse_test_results(test, config, timer)
            for cedar_test in cedar_tests:
                report.add_test(cedar_test)
    except Exception as e:  # pylint: disable=broad-except
        LOG.error('Unexcepted exception: %s', repr(e), exc_info=1)
    finally:
        # Save exit codes for analysis.py
        config.save()
        # Cedar
        # TODO: Encapsulate Cedar into workload_output_parser
        report.write_report()
        run_pre_post_commands('post_task',
                              [test_control_config, mongodb_setup_config],
                              config, EXCEPTION_BEHAVIOR.CONTINUE)
        # Print perf.json to screen
        print_perf_json(config['test_control']['perf_json']['path'])

    LOG.info("%s of %s tests exited with an error.", num_tests_failed,
             num_tests_run)

    # Return True if all tests failed or if the last test errored.
    return (num_tests_run == num_tests_failed) or (cur_test_status
                                                   == TestStatus.ERROR)