Exemple #1
0
def wait_for_dcos_oss(
    cluster: Cluster,
    request: SubRequest,
    log_dir: Path,
) -> None:
    """
    Helper for ``wait_for_dcos_oss`` that automatically dumps the journal of
    every cluster node if a ``DCOSTimeoutError`` is hit.
    """
    try:
        cluster.wait_for_dcos_oss()
    except DCOSTimeoutError:
        # Dumping the logs on timeout only works if DC/OS has already started
        # the systemd units that the logs are retrieved from.
        # This does currently not pose a problem since the ``wait_for_dcos_ee``
        # timeout is set to one hour. We expect the systemd units to have
        # started by then.
        dump_cluster_journals(
            cluster=cluster,
            target_dir=log_dir / artifact_dir_format(request.node.name),
        )
        raise
Exemple #2
0
    def test_no_live_logging(
        self,
        caplog: LogCaptureFixture,
        cluster_backend: ClusterBackend,
        oss_installer: Path,
    ) -> None:
        """
        By default, subprocess output is not logged during DC/OS installation.
        """
        with pytest.raises(CalledProcessError):
            # It is not possible to install DC/OS with two master nodes.
            with Cluster(
                    masters=2,
                    cluster_backend=cluster_backend,
            ) as cluster:
                cluster.install_dcos_from_path(
                    dcos_installer=oss_installer,
                    dcos_config=cluster.base_config,
                    ip_detect_path=cluster_backend.ip_detect_path,
                )

        assert not self._two_masters_error_logged(log_records=caplog.records)
Exemple #3
0
    def test_install_cluster_from_url(
        self,
        cluster_backend: ClusterBackend,
        oss_installer_url: str,
        tmpdir: local,
    ) -> None:
        """
        Install a DC/OS cluster with a custom ``ip-detect`` script.
        """
        with Cluster(
            cluster_backend=cluster_backend,
            masters=1,
            agents=0,
            public_agents=0,
        ) as cluster:

            (master, ) = cluster.masters
            ip_detect_file = tmpdir.join('ip-detect')
            ip_detect_contents = dedent(
                """\
                #!/bin/bash
                echo {ip_address}
                """,
            ).format(ip_address=master.private_ip_address)
            ip_detect_file.write(ip_detect_contents)

            cluster.install_dcos_from_url(
                dcos_installer=oss_installer_url,
                dcos_config=cluster.base_config,
                ip_detect_path=cluster_backend.ip_detect_path,
                files_to_copy_to_genconf_dir=[
                    (Path(str(ip_detect_file)), Path('/genconf/ip-detect')),
                ],
            )
            cluster.wait_for_dcos_oss()
            cat_result = master.run(
                args=['cat', '/opt/mesosphere/bin/detect_ip'],
            )
            assert cat_result.stdout.decode() == ip_detect_contents
Exemple #4
0
 def test_install_dcos_from_node(
     self,
     oss_installer_url: str,
 ) -> None:
     """
     It is possible to install DC/OS on an AWS cluster node by node.
     """
     cluster_backend = AWS()
     with Cluster(
         cluster_backend=cluster_backend,
         agents=0,
         public_agents=0,
     ) as cluster:
         (master, ) = cluster.masters
         master.install_dcos_from_url(
             dcos_installer=oss_installer_url,
             dcos_config=cluster.base_config,
             role=Role.MASTER,
             output=Output.LOG_AND_CAPTURE,
             ip_detect_path=cluster_backend.ip_detect_path,
         )
         cluster.wait_for_dcos_oss()
Exemple #5
0
 def test_install_dcos_from_node(
     self,
     oss_artifact_url: str,
 ) -> None:
     """
     It is possible to install DC/OS on an AWS cluster node by node.
     """
     cluster_backend = AWS()
     with Cluster(
         cluster_backend=cluster_backend,
         agents=0,
         public_agents=0,
     ) as cluster:
         (master, ) = cluster.masters
         master.install_dcos_from_url(
             build_artifact=oss_artifact_url,
             dcos_config=cluster.base_config,
             role=Role.MASTER,
             log_output_live=True,
             ip_detect_path=cluster_backend.ip_detect_path,
         )
         cluster.wait_for_dcos_oss()
Exemple #6
0
    def test_run_pytest(
        self,
        cluster_backend: ClusterBackend,
        enterprise_artifact: Path,
        license_key_contents: str,
    ) -> None:
        """
        Integration tests can be run with `pytest`.
        Errors are raised from `pytest`.
        """
        superuser_username = str(uuid.uuid4())
        superuser_password = str(uuid.uuid4())
        config = {
            'superuser_username': superuser_username,
            'superuser_password_hash': sha512_crypt.hash(superuser_password),
            'fault_domain_enabled': False,
            'license_key_contents': license_key_contents,
        }

        with Cluster(cluster_backend=cluster_backend) as cluster:
            cluster.install_dcos_from_path(
                build_artifact=enterprise_artifact,
                extra_config=config,
                log_output_live=True,
            )
            cluster.wait_for_dcos_ee(
                superuser_username=superuser_username,
                superuser_password=superuser_password,
            )
            # No error is raised with a successful command.
            cluster.run_integration_tests(
                pytest_command=['pytest', '-vvv', '-s', '-x', 'test_tls.py'],
                env={
                    'DCOS_LOGIN_UNAME': superuser_username,
                    'DCOS_LOGIN_PW': superuser_password,
                },
                log_output_live=True,
            )
Exemple #7
0
    def test_enterprise(
        self,
        cluster_backend: ClusterBackend,
        enterprise_1_9_installer: Path,
    ) -> None:
        """
        A DC/OS Enterprise 1.9 cluster can be started.
        """
        superuser_username = str(uuid.uuid4())
        superuser_password = str(uuid.uuid4())
        config = {
            'superuser_username': superuser_username,
            'superuser_password_hash': sha512_crypt.hash(superuser_password),
        }

        with Cluster(cluster_backend=cluster_backend) as cluster:
            cluster.install_dcos_from_path(
                dcos_installer=enterprise_1_9_installer,
                dcos_config={
                    **cluster.base_config,
                    **config,
                },
                output=Output.LOG_AND_CAPTURE,
                ip_detect_path=cluster_backend.ip_detect_path,
            )
            cluster.wait_for_dcos_ee(
                superuser_username=superuser_username,
                superuser_password=superuser_password,
            )
            for node in {
                    *cluster.masters,
                    *cluster.agents,
                    *cluster.public_agents,
            }:
                build = node.dcos_build_info()
                assert build.version.startswith('1.9')
                assert build.commit
                assert build.variant == DCOSVariant.ENTERPRISE
Exemple #8
0
    def test_install_dcos_with_custom_ip_detect(
        self,
        oss_installer_url: str,
        tmp_path: Path,
    ) -> None:
        """
        It is possible to install DC/OS on an AWS with a custom IP detect
        script.
        """
        cluster_backend = AWS()
        with Cluster(
                cluster_backend=cluster_backend,
                agents=0,
                public_agents=0,
        ) as cluster:
            (master, ) = cluster.masters
            ip_detect_file = tmp_path / 'ip-detect'
            ip_detect_contents = dedent(
                """\
                #!/bin/bash
                echo {ip_address}
                """, ).format(ip_address=master.private_ip_address)
            ip_detect_file.write_text(ip_detect_contents)

            cluster.install_dcos_from_url(
                dcos_installer=oss_installer_url,
                dcos_config=cluster.base_config,
                output=Output.LOG_AND_CAPTURE,
                ip_detect_path=ip_detect_file,
            )
            cluster.wait_for_dcos_oss()
            cat_result = master.run(
                args=['cat', '/opt/mesosphere/bin/detect_ip'], )
            node_script_contents = cat_result.stdout.decode()
            assert node_script_contents == ip_detect_contents
            backend_script_path = cluster_backend.ip_detect_path
            backend_script_contents = backend_script_path.read_text()
            assert node_script_contents != backend_script_contents
Exemple #9
0
    def test_install_dcos_from_path(self) -> None:
        """
        The AWS backend requires a build artifact URL in order to launch a
        DC/OS cluster.
        """
        with Cluster(
                cluster_backend=AWS(),
                masters=1,
                agents=0,
                public_agents=0,
        ) as cluster:
            with pytest.raises(NotImplementedError) as excinfo:
                cluster.install_dcos_from_path(
                    build_artifact=Path('/foo'),
                    dcos_config=cluster.base_config,
                )

        expected_error = (
            'The AWS backend does not support the installation of build '
            'artifacts passed via path. This is because a more efficient'
            'installation method exists in ``install_dcos_from_url``.')

        assert str(excinfo.value) == expected_error
Exemple #10
0
    def cluster(
        self,
        oss_artifact: Path,
        cluster_backend: ClusterBackend,
    ) -> Iterator[Cluster]:
        """
        Return a `Cluster` with DC/OS installed and running.

        This is class scoped as we do not intend to modify the cluster in ways
        that make tests interfere with one another.
        """
        with Cluster(cluster_backend=cluster_backend) as dcos_cluster:
            dcos_cluster.install_dcos_from_path(
                dcos_config=dcos_cluster.base_config,
                build_artifact=oss_artifact,
                log_output_live=True,
            )
            # We exercise the "http_checks=False" code here but we do not test
            # its functionality. It is a temporary measure while we wait for
            # more thorough dcos-checks.
            dcos_cluster.wait_for_dcos_oss(http_checks=False)
            dcos_cluster.wait_for_dcos_oss()
            yield dcos_cluster
Exemple #11
0
    def test_live_logging(
        self,
        caplog: LogCaptureFixture,
        cluster_backend: ClusterBackend,
        oss_artifact: Path,
    ) -> None:
        """
        If `log_output_live` is given as `True`, the installation output is
        logged live.
        """
        with pytest.raises(CalledProcessError):
            # It is not possible to install DC/OS with two master nodes.
            with Cluster(
                    masters=2,
                    cluster_backend=cluster_backend,
            ) as cluster:
                cluster.install_dcos_from_path(
                    build_artifact=oss_artifact,
                    dcos_config=cluster.base_config,
                    log_output_live=True,
                )

        assert self._two_masters_error_logged(log_records=caplog.records)
    def test_host_driver_not_supported(self) -> None:
        """
        If the host's storage driver is not supported, `aufs` is used.
        """
        client = docker.from_env(version='auto')
        info = {**client.info(), **{'Driver': 'not_supported'}}

        with Mocker(real_http=True) as mock:
            mock.get(url=self._docker_info_endpoint, json=info)
            backend = Docker()

        assert backend.docker_storage_driver == DockerStorageDriver.AUFS

        with Cluster(
                cluster_backend=backend,
                masters=1,
                agents=0,
                public_agents=0,
        ) as cluster:
            (master, ) = cluster.masters
            node_driver = self._get_storage_driver(node=master)

        assert node_driver == DockerStorageDriver.AUFS
Exemple #13
0
 def test_install_dcos_from_url(
     self,
     oss_installer_url: str,
     cluster_backend: ClusterBackend,
 ) -> None:
     """
     It is possible to install DC/OS on a node from a URL.
     """
     with Cluster(cluster_backend=cluster_backend) as cluster:
         for nodes, role in (
             (cluster.masters, Role.MASTER),
             (cluster.agents, Role.AGENT),
             (cluster.public_agents, Role.PUBLIC_AGENT),
         ):
             for node in nodes:
                 node.install_dcos_from_url(
                     dcos_installer=oss_installer_url,
                     dcos_config=cluster.base_config,
                     ip_detect_path=cluster_backend.ip_detect_path,
                     role=role,
                     output=Output.LOG_AND_CAPTURE,
                 )
         cluster.wait_for_dcos_oss()
Exemple #14
0
 def test_set_false_exception_raised(
     self,
     cluster_backend: ClusterBackend,
     oss_artifact: Path,
 ) -> None:
     """
     If `destroy_on_error` is set to `False` and an exception is raised,
     the cluster is not destroyed.
     """
     with pytest.raises(Exception):
         with Cluster(
                 generate_config_path=oss_artifact,
                 agents=0,
                 public_agents=0,
                 destroy_on_error=False,
                 cluster_backend=cluster_backend,
         ) as cluster:
             (master, ) = cluster.masters
             cluster.wait_for_dcos()
             raise Exception()
     # No exception is raised. The node still exists.
     master.run_as_root(args=['echo', 'hello'], log_output_live=True)
     cluster.destroy()
Exemple #15
0
    def test_extend_config(
        self,
        path: str,
        cluster_backend: ClusterBackend,
        oss_artifact: Path,
    ) -> None:
        """
        This example demonstrates that it is possible to create a cluster
        with an extended configuration file.

        See ``test_default`` for evidence that the custom configuration is
        used.
        """
        config = {
            'cluster_docker_credentials': {
                'auths': {
                    'https://index.docker.io/v1/': {
                        'auth': 'redacted'
                    },
                },
            },
            'cluster_docker_credentials_enabled': True,
        }

        with Cluster(
                agents=0,
                public_agents=0,
                cluster_backend=cluster_backend,
        ) as cluster:
            cluster.install_dcos_from_path(
                oss_artifact,
                extra_config=config,
            )
            cluster.wait_for_dcos_oss()
            (master, ) = cluster.masters
            master.run(args=['test', '-f', path],
                       user=cluster.default_ssh_user)
Exemple #16
0
    def test_wait_for_dcos_ee(
        self,
        cluster_backend: ClusterBackend,
        enterprise_artifact: Path,
        license_key_contents: str,
    ) -> None:
        """
        A cluster can start up in security disabled mode.
        """
        superuser_username = str(uuid.uuid4())
        superuser_password = str(uuid.uuid4())
        config = {
            'superuser_username': superuser_username,
            'superuser_password_hash': sha512_crypt.hash(superuser_password),
            'fault_domain_enabled': False,
            'license_key_contents': license_key_contents,
            'security': 'disabled',
        }

        with Cluster(
                cluster_backend=cluster_backend,
                agents=0,
                public_agents=0,
        ) as cluster:
            cluster.install_dcos_from_path(
                build_artifact=enterprise_artifact,
                dcos_config={
                    **cluster.base_config,
                    **config,
                },
                log_output_live=True,
            )
            cluster.wait_for_dcos_ee(
                superuser_username=superuser_username,
                superuser_password=superuser_password,
            )
Exemple #17
0
    def test_custom_key_pair(self, tmp_path: Path) -> None:
        """
        It is possible to pass a custom key pair to the AWS backend.
        """
        key_name = 'e2e-test-{random}'.format(random=uuid.uuid4().hex)
        private_key_path = tmp_path / 'private_key'
        public_key_path = tmp_path / 'public_key'
        _write_key_pair(
            public_key_path=public_key_path,
            private_key_path=private_key_path,
        )
        backend = AWS(aws_key_pair=(key_name, private_key_path))
        region_name = backend.aws_region
        ec2 = boto3.client('ec2', region_name=region_name)
        ec2.import_key_pair(
            KeyName=key_name,
            PublicKeyMaterial=public_key_path.read_bytes(),
        )

        try:
            with Cluster(
                    cluster_backend=backend,
                    agents=0,
                    public_agents=0,
            ) as cluster:
                (master, ) = cluster.masters
                node = Node(
                    public_ip_address=master.public_ip_address,
                    private_ip_address=master.private_ip_address,
                    default_user=master.default_user,
                    ssh_key_path=private_key_path,
                )

                node.run(args=['echo', '1'])
        finally:
            ec2.delete_key_pair(KeyName=key_name)
Exemple #18
0
def static_three_master_cluster(
    artifact_path: Path,
    docker_backend: Docker,
    request: SubRequest,
    log_dir: Path,
) -> Generator[Cluster, None, None]:
    """Spin up a highly-available DC/OS cluster with three master nodes."""
    with Cluster(
            cluster_backend=docker_backend,
            masters=3,
            agents=0,
            public_agents=0,
    ) as cluster:
        cluster.install_dcos_from_path(
            dcos_installer=artifact_path,
            dcos_config=cluster.base_config,
            ip_detect_path=docker_backend.ip_detect_path,
        )
        wait_for_dcos_oss(
            cluster=cluster,
            request=request,
            log_dir=log_dir,
        )
        yield cluster
    def test_mismatched_agents(
        self,
        dcos_cluster: Cluster,
        existing_cluster_backend: ClusterBackend,
    ) -> None:
        """
        If `agents` differs from the number of agents an error is raised.
        """
        with pytest.raises(ValueError) as excinfo:
            with Cluster(
                    cluster_backend=existing_cluster_backend,
                    generate_config_path=None,
                    masters=len(dcos_cluster.masters),
                    agents=len(dcos_cluster.agents) + 1,
                    public_agents=len(dcos_cluster.public_agents),
                    destroy_on_error=False,
                    destroy_on_success=False,
            ):
                pass  # pragma: no cover

        expected_error = ('The number of agent nodes is `1`. '
                          'Therefore, `agents` must be set to `1`.')

        assert str(excinfo.value) == expected_error
    def test_destroy_on_success(
        self,
        dcos_cluster: Cluster,
        existing_cluster_backend: ClusterBackend,
    ) -> None:
        """
        If `destroy_on_success` is set to `True` an error is raised.
        """
        with pytest.raises(ValueError) as excinfo:
            with Cluster(
                    cluster_backend=existing_cluster_backend,
                    masters=len(dcos_cluster.masters),
                    agents=len(dcos_cluster.agents),
                    public_agents=len(dcos_cluster.public_agents),
                    destroy_on_error=False,
                    destroy_on_success=True,
            ):
                pass  # pragma: no cover

        expected_error = (
            'The given cluster backend does not support being destroyed.'
            ' Therefore, `destroy_on_success` must be set to `False`.')

        assert str(excinfo.value) == expected_error
    def test_install_dcos_from_url(self, oss_artifact_url: str) -> None:
        """
        The Docker backend requires a build artifact in order
        to launch a DC/OS cluster.
        """
        with Cluster(
                cluster_backend=Docker(),
                masters=1,
                agents=0,
                public_agents=0,
        ) as cluster:
            with pytest.raises(NotImplementedError) as excinfo:
                cluster.install_dcos_from_url(
                    build_artifact=oss_artifact_url,
                    dcos_config=cluster.base_config,
                )

        expected_error = (
            'The Docker backend does not support the installation of DC/OS '
            'by build artifacts passed via URL string. This is because a more '
            'efficient installation method exists in `install_dcos_from_path`.'
        )

        assert str(excinfo.value) == expected_error
    def test_extra_config(
        self,
        dcos_cluster: Cluster,
        existing_cluster_backend: ClusterBackend,
    ) -> None:
        """
        If `extra_config` is not empty, an error is raised.
        """
        with pytest.raises(ValueError) as excinfo:
            with Cluster(
                    cluster_backend=existing_cluster_backend,
                    masters=len(dcos_cluster.masters),
                    agents=len(dcos_cluster.agents),
                    public_agents=len(dcos_cluster.public_agents),
                    destroy_on_error=False,
                    destroy_on_success=False,
                    extra_config={'foo': 'bar'},
            ):
                pass  # pragma: no cover

        expected_error = ('Nodes are already configured. '
                          'Therefore, `extra_config` must be empty.')

        assert str(excinfo.value) == expected_error
Exemple #23
0
    def test_live_logging(
        self,
        caplog: LogCaptureFixture,
        cluster_backend: ClusterBackend,
        oss_installer: Path,
    ) -> None:
        """
        If ``output`` is given as ``Output.LOG_AND_CAPTURE``, the installation
        output is logged live.
        """
        with pytest.raises(CalledProcessError):
            # It is not possible to install DC/OS with two master nodes.
            with Cluster(
                    masters=2,
                    cluster_backend=cluster_backend,
            ) as cluster:
                cluster.install_dcos_from_path(
                    dcos_installer=oss_installer,
                    ip_detect_path=cluster_backend.ip_detect_path,
                    dcos_config=cluster.base_config,
                    output=Output.LOG_AND_CAPTURE,
                )

        assert self._two_masters_error_logged(log_records=caplog.records)
def test_replace_all_static(
    artifact_path: Path,
    docker_network_three_available_addresses: Network,
    tmp_path: Path,
    request: SubRequest,
    log_dir: Path,
) -> None:
    """
    In a cluster with an Exhibitor backend consisting of a static ZooKeeper
    ensemble, after removing one master, and then adding another master with
    the same IP address, the cluster will get to a healthy state. This is
    repeated until all masters in the original cluster have been replaced.
    The purpose of this test is to assert that the ``node-poststart``
    procedure correctly prevents a master node replacement from being performed
    too quickly. A new master node should only become part of the cluster if
    there are no more underreplicated ranges reported by CockroachDB.

    Permanent CockroachDB data loss and a potential breakage of DC/OS occurs
    when a second master node is taken down for replacement while CockroachDB
    is recovering and there are still underreplicated ranges due to a recent
    other master node replacement.
    """
    docker_backend = Docker(network=docker_network_three_available_addresses)

    with Cluster(
        cluster_backend=docker_backend,
        # Allocate all 3 available IP addresses in the subnet.
        masters=3,
        agents=0,
        public_agents=0,
    ) as original_cluster:
        master = next(iter(original_cluster.masters))
        result = master.run(
            args=[
                'ifconfig',
                '|', 'grep', '-B1', str(master.public_ip_address),
                '|', 'grep', '-o', '"^\w*"',
            ],
            output=Output.LOG_AND_CAPTURE,
            shell=True,
        )
        interface = result.stdout.strip().decode()
        ip_detect_contents = textwrap.dedent(
            """\
            #!/bin/bash -e
            if [ -f /sbin/ip ]; then
               IP_CMD=/sbin/ip
            else
               IP_CMD=/bin/ip
            fi

            $IP_CMD -4 -o addr show dev {interface} | awk '{{split($4,a,"/");print a[1]}}'
            """.format(interface=interface),
        )
        ip_detect_path = tmp_path / 'ip-detect'
        ip_detect_path.write_text(data=ip_detect_contents)
        static_config = {
            'master_discovery': 'static',
            'master_list': [str(master.private_ip_address)
                            for master in original_cluster.masters],
        }
        dcos_config = {
            **original_cluster.base_config,
            **static_config,
        }
        original_cluster.install_dcos_from_path(
            dcos_installer=artifact_path,
            dcos_config=dcos_config,
            ip_detect_path=ip_detect_path,
        )
        wait_for_dcos_oss(
            cluster=original_cluster,
            request=request,
            log_dir=log_dir,
        )
        current_cluster = original_cluster
        tmp_clusters = set()

        original_masters = original_cluster.masters

        try:
            for master_to_be_replaced in original_masters:
                # Destroy a master and free one IP address.
                original_cluster.destroy_node(node=master_to_be_replaced)

                temporary_cluster = Cluster(
                    cluster_backend=docker_backend,
                    # Allocate one container with the now free IP address.
                    masters=1,
                    agents=0,
                    public_agents=0,
                )
                tmp_clusters.add(temporary_cluster)

                # Install a new master on a new container with the same IP address.
                (new_master, ) = temporary_cluster.masters
                new_master.install_dcos_from_path(
                    dcos_installer=artifact_path,
                    dcos_config=dcos_config,
                    role=Role.MASTER,
                    ip_detect_path=ip_detect_path,
                )
                # Form a new cluster with the newly create master node.
                new_cluster = Cluster.from_nodes(
                    masters=current_cluster.masters.union({new_master}),
                    agents=current_cluster.agents,
                    public_agents=current_cluster.public_agents,
                )
                # The `wait_for_dcos_oss` function waits until the new master has
                # joined the cluster and all masters are healthy. Without the
                # cockroachdb check, this succeeds before all cockroachdb ranges
                # have finished replicating to the new master. That meant that the
                # next master would be replaced too quickly, while it had data that
                # was not present elsewhere in the cluster. This lead to
                # irrecoverable dataloss.  This function waits until the
                # master node is "healthy". This is a requirement for replacing the
                # next master node.
                #
                # We don't call the cockroachdb ranges check directly as the
                # purpose of this test is to ensure that when an operator follows
                # our documented procedure for replacing a master node multiple
                # times in a row (e.g. during a cluster upgrade) then the cluster
                # remains healthy throughout and afterwards.
                #
                # If we called the check directly here, we would be
                # sure the check is being called, but we would not be sure that
                # "wait_for_dcos_oss", i.e., the standard procedure for determining
                # whether a node is healthy, is sufficient to prevent the cluster
                # from breaking.
                #
                # We perform this check after every master is replaced, as that is
                # what we tell operators to do: "After installing the new master
                # node, wait until it becomes healthy before proceeding to the
                # next."
                #
                # The procedure for replacing multiple masters is documented here:
                # https://docs.mesosphere.com/1.12/installing/production/upgrading/#dcos-masters
                wait_for_dcos_oss(
                    cluster=new_cluster,
                    request=request,
                    log_dir=log_dir,
                )
                # Use the new cluster object in the next replacement iteration.
                current_cluster = new_cluster

        finally:
            for cluster in tmp_clusters:
                cluster.destroy()
    def test_custom_mounts(self, tmpdir: local) -> None:
        """
        It is possible to mount local files to master nodes.
        """
        local_all_file = tmpdir.join('all_file.txt')
        local_all_file.write('')
        local_master_file = tmpdir.join('master_file.txt')
        local_master_file.write('')
        local_agent_file = tmpdir.join('agent_file.txt')
        local_agent_file.write('')
        local_public_agent_file = tmpdir.join('public_agent_file.txt')
        local_public_agent_file.write('')

        master_path = Path('/etc/on_master_nodes.txt')
        agent_path = Path('/etc/on_agent_nodes.txt')
        public_agent_path = Path('/etc/on_public_agent_nodes.txt')
        all_path = Path('/etc/on_all_nodes.txt')

        custom_container_mount = Mount(
            source=str(local_all_file),
            target=str(all_path),
            type='bind',
        )

        custom_master_mount = Mount(
            source=str(local_master_file),
            target=str(master_path),
            type='bind',
        )

        custom_agent_mount = Mount(
            source=str(local_agent_file),
            target=str(agent_path),
            type='bind',
        )

        custom_public_agent_mount = Mount(
            source=str(local_public_agent_file),
            target=str(public_agent_path),
            type='bind',
        )

        backend = Docker(
            custom_container_mounts=[custom_container_mount],
            custom_master_mounts=[custom_master_mount],
            custom_agent_mounts=[custom_agent_mount],
            custom_public_agent_mounts=[custom_public_agent_mount],
        )

        with Cluster(
                cluster_backend=backend,
                masters=1,
                agents=1,
                public_agents=1,
        ) as cluster:
            for nodes, path, local_file in [
                (cluster.masters, master_path, local_master_file),
                (cluster.masters, all_path, local_all_file),
                (cluster.agents, agent_path, local_agent_file),
                (cluster.agents, all_path, local_all_file),
                (
                    cluster.public_agents,
                    public_agent_path,
                    local_public_agent_file,
                ),
                (cluster.public_agents, all_path, local_all_file),
            ]:
                for node in nodes:
                    content = str(uuid.uuid4())
                    local_file.write(content)
                    args = ['cat', str(path)]
                    result = node.run(args=args)
                    assert result.stdout.decode() == content
Exemple #26
0
def run_command(
    args: List[str],
    cluster: Cluster,
    host: Node,
    transport: Transport,
    use_test_env: bool,
    dcos_login_uname: str,
    dcos_login_pw: str,
    env: Dict[str, str],
) -> None:
    """
    Run a command on a given cluster / host.

    Args:
        args: The arguments to run on a node.
        cluster: The cluster to run a command on.
        host: the node to run a command on.
        transport: The transport to use to communicate with the cluster.
        use_test_env: Whether to use the DC/OS integration test environment to
            run the command in.
        dcos_login_uname: The DC/OS login username. This is only used if using
            the test environment and DC/OS Enterprise.
        dcos_login_pw: The DC/OS login password. This is only used if using
            the test environment and DC/OS Enterprise.
        env: Environment variables to set before running the command.
    """
    columns, rows = click.get_terminal_size()

    env = {
        # LINES and COLUMNS are needed if using the ``DOCKER_EXEC`` transport.
        # See https://github.com/moby/moby/issues/35407.
        'COLUMNS': str(columns),
        'LINES': str(rows),
        'DCOS_LOGIN_UNAME': dcos_login_uname,
        'DCOS_LOGIN_PW': dcos_login_pw,
        **env,
    }

    if not use_test_env:
        try:
            host.run(
                args=args,
                log_output_live=False,
                tty=True,
                shell=True,
                env=env,
                transport=transport,
            )
        except subprocess.CalledProcessError as exc:
            sys.exit(exc.returncode)

        return

    try:
        cluster.run_integration_tests(
            pytest_command=args,
            tty=True,
            env=env,
            test_host=host,
            transport=transport,
        )
    except subprocess.CalledProcessError as exc:
        sys.exit(exc.returncode)
def test_replace_all_static(
    artifact_path: Path,
    docker_network_three_available_addresses: Network,
    tmp_path: Path,
    request: SubRequest,
    log_dir: Path,
) -> None:
    """
    In a cluster with an Exhibitor backend consisting of a static ZooKeeper
    ensemble, after removing one master, and then adding another master with
    the same IP address, the cluster will get to a healthy state. This is
    repeated until all masters in the original cluster have been replaced.
    The purpose of this test is to assert that the ``node-poststart``
    procedure correctly prevents a master node replacement from being performed
    too quickly. A new master node should only become part of the cluster if
    there are no more underreplicated ranges reported by CockroachDB.

    Permanent CockroachDB data loss and a potential breakage of DC/OS occurs
    when a second master node is taken down for replacement while CockroachDB
    is recovering and there are still underreplicated ranges due to a recent
    other master node replacement.
    """
    docker_backend = Docker(network=docker_network_three_available_addresses)

    with Cluster(
            cluster_backend=docker_backend,
            # Allocate all 3 available IP addresses in the subnet.
            masters=3,
            agents=0,
            public_agents=0,
    ) as original_cluster:
        master = next(iter(original_cluster.masters))
        result = master.run(
            args=[
                'ifconfig',
                '|',
                'grep',
                '-B1',
                str(master.public_ip_address),
                '|',
                'grep',
                '-o',
                '"^\w*"',
            ],
            shell=True,
        )
        interface = result.stdout.strip().decode()
        ip_detect_contents = textwrap.dedent(
            """\
            #!/bin/bash -e
            if [ -f /sbin/ip ]; then
               IP_CMD=/sbin/ip
            else
               IP_CMD=/bin/ip
            fi

            $IP_CMD -4 -o addr show dev {interface} | awk '{{split($4,a,"/");print a[1]}}'
            """.format(interface=interface), )
        ip_detect_path = tmp_path / 'ip-detect'
        ip_detect_path.write_text(data=ip_detect_contents)
        static_config = {
            'master_discovery':
            'static',
            'master_list': [
                str(master.private_ip_address)
                for master in original_cluster.masters
            ],
        }
        dcos_config = {
            **original_cluster.base_config,
            **static_config,
        }
        original_cluster.install_dcos_from_path(
            dcos_installer=artifact_path,
            dcos_config=dcos_config,
            ip_detect_path=ip_detect_path,
        )
        wait_for_dcos_oss(
            cluster=original_cluster,
            request=request,
            log_dir=log_dir,
        )
        current_cluster = original_cluster
        tmp_clusters = set()

        original_masters = original_cluster.masters

        try:
            for master_to_be_replaced in original_masters:
                # Destroy a master and free one IP address.
                current_cluster.destroy_node(node=master_to_be_replaced)

                temporary_cluster = Cluster(
                    cluster_backend=docker_backend,
                    # Allocate one container with the now free IP address.
                    masters=1,
                    agents=0,
                    public_agents=0,
                )
                tmp_clusters.add(temporary_cluster)

                # Install a new master on a new container with the same IP address.
                (new_master, ) = temporary_cluster.masters
                new_master.install_dcos_from_path(
                    dcos_installer=artifact_path,
                    dcos_config=dcos_config,
                    role=Role.MASTER,
                    ip_detect_path=ip_detect_path,
                )
                # Form a new cluster with the newly create master node.
                new_cluster = Cluster.from_nodes(
                    masters=current_cluster.masters.add(new_master),
                    agents=current_cluster.agents,
                    public_agents=current_cluster.public_agents,
                )
                # The `wait_for_dcos_oss` function waits until the new master has
                # joined the cluster and all masters are healthy. Without the
                # cockroachdb check, this succeeds before all cockroachdb ranges
                # have finished replicating to the new master. That meant that the
                # next master would be replaced too quickly, while it had data that
                # was not present elsewhere in the cluster. This lead to
                # irrecoverable dataloss.  This function waits until the
                # master node is "healthy". This is a requirement for replacing the
                # next master node.
                #
                # We don't call the cockroachdb ranges check directly as the
                # purpose of this test is to ensure that when an operator follows
                # our documented procedure for replacing a master node multiple
                # times in a row (e.g. during a cluster upgrade) then the cluster
                # remains healthy throughout and afterwards.
                #
                # If we called the check directly here, we would be
                # sure the check is being called, but we would not be sure that
                # "wait_for_dcos_oss", i.e., the standard procedure for determining
                # whether a node is healthy, is sufficient to prevent the cluster
                # from breaking.
                #
                # We perform this check after every master is replaced, as that is
                # what we tell operators to do: "After installing the new master
                # node, wait until it becomes healthy before proceeding to the
                # next."
                #
                # The procedure for replacing multiple masters is documented here:
                # https://docs.mesosphere.com/1.12/installing/production/upgrading/#dcos-masters
                wait_for_dcos_oss(
                    cluster=new_cluster,
                    request=request,
                    log_dir=log_dir,
                )
                # Use the new cluster object in the next replacement iteration.
                current_cluster = new_cluster

        finally:
            for cluster in tmp_clusters:
                cluster.destroy()
Exemple #28
0
from passlib.hash import sha512_crypt

if len(sys.argv) != 2:
    print("Please specify the installer URL as argument.", file=sys.stderr)
    sys.exit(1)

test_license = os.environ.get('DCOS_TEST_LICENSE')
if not test_license:
    print("Please specify a license in $DCOS_TEST_LICENSE.", file=sys.stderr)
    sys.exit(1)

private_key_path = os.environ.get('DCOS_TEST_SSH_KEY_PATH')
aws_key_pair = ('default', private_key_path) if private_key_path else None

cluster_backend = AWS(aws_key_pair=aws_key_pair)
cluster = Cluster(cluster_backend=cluster_backend, agents=0, public_agents=0)

username = '******'
password = ''.join(
    random.choice(string.ascii_letters + string.digits) for i in range(12))

extra_config = {
    'superuser_username': username,
    'superuser_password_hash': sha512_crypt.hash(password),
    'fault_domain_enabled': False,
    'license_key_contents': test_license,
}

dcos_config = {**cluster.base_config, **extra_config}

cluster.install_dcos_from_url(
Exemple #29
0
from passlib.hash import sha512_crypt

if len(sys.argv) != 2:
    print("Please specify the installer URL as argument.", file=sys.stderr)
    sys.exit(1)

dcos_variant = os.environ.get('DCOS_TEST_VARIANT')
if not dcos_variant:
    print("Please set DCOS_TEST_VARIANT to 'open' or 'enterprise'.", file=sys.stderr)
    sys.exit(1)

private_key_path = os.environ.get('DCOS_TEST_SSH_KEY_PATH')
aws_key_pair = ('default', private_key_path) if private_key_path else None

cluster_backend = AWS(aws_region='us-east-1', aws_key_pair=aws_key_pair)
cluster = Cluster(cluster_backend=cluster_backend, agents=0, public_agents=0)

username = '******'
password = ''.join(random.choice(string.ascii_letters + string.digits) for i in range(12))

extra_config = {
    'superuser_username': username,
    'superuser_password_hash': sha512_crypt.hash(password),
#    'fault_domain_enabled': False,
}

test_license = os.environ.get('DCOS_TEST_LICENSE')
if test_license:
    extra_config['license_key_contents'] = test_license

dcos_config = {**cluster.base_config, **extra_config}
def run_tests(e2e_backend, installer_url, dcos_license, dcos_url,
              admin_username, admin_password, ssh_user, ssh_key_path):

    os.environ["CLI_TEST_SSH_USER"] = ssh_user
    os.environ["CLI_TEST_MASTER_PROXY"] = "1"
    os.environ["CLI_TEST_SSH_KEY_PATH"] = ssh_key_path

    # extra dcos_config (for dcos_launch and dcos_docker backends)
    extra_config = {
        'superuser_username': admin_username,
        'superuser_password_hash': sha512_crypt.hash(admin_password),
        'fault_domain_enabled': False,
        'license_key_contents': dcos_license,
    }

    if e2e_backend == 'dcos_launch':
        cluster_backend = AWS()

        with Cluster(cluster_backend=cluster_backend, agents=1) as cluster:
            dcos_config = {**cluster.base_config, **extra_config}

            cluster.install_dcos_from_url(
                build_artifact=installer_url,
                dcos_config=dcos_config,
                log_output_live=True,
            )

            os.environ["CLI_TEST_SSH_KEY_PATH"] = str(
                cluster._cluster._ssh_key_path)

            _run_tests(cluster, admin_username, admin_password)
    elif e2e_backend == 'dcos_docker':
        dcos_ee_installer_filename = 'dcos_generate_config.ee.sh'
        dcos_ee_installer_path = Path.cwd() / Path(dcos_ee_installer_filename)

        if not dcos_ee_installer_path.exists():
            urllib.request.urlretrieve(installer_url,
                                       dcos_ee_installer_filename)

        with Cluster(cluster_backend=Docker(), agents=1) as cluster:
            dcos_config = {**cluster.base_config, **extra_config}

            cluster.install_dcos_from_path(
                build_artifact=dcos_ee_installer_path,
                dcos_config=dcos_config,
                log_output_live=True,
            )

            _run_tests(cluster, admin_username, admin_password)
    elif e2e_backend == 'existing':
        try:
            dcos_ip = IPv4Address(dcos_url)
        except ValueError:
            parsed_dcos_url = urlparse(dcos_url)
            dcos_hostname = parsed_dcos_url.hostname
            dcos_ip = IPv4Address(socket.gethostbyname(dcos_hostname))

        masters = set([
            Node(
                public_ip_address=dcos_ip,
                private_ip_address=dcos_ip,
                ssh_key_path=Path(ssh_key_path),
                default_ssh_user=ssh_user,
            )
        ])

        cluster = Cluster.from_nodes(
            masters=masters,
            agents=set(),
            public_agents=set(),
        )

        _run_tests(cluster, admin_username, admin_password)
Exemple #31
0
    def test_copy_files_to_installer(
        self,
        cluster_backend: ClusterBackend,
        enterprise_artifact: Path,
        license_key_contents: str,
    ) -> None:
        """
        Files can be copied from the host to the installer node at creation
        time.

        The installer container is removed shortly after creation by DC/OS
        Docker. Therefore, we inspect the symptoms - we can use custom CA
        certificates.

        See CA certificate tests in Enterprise DC/OS for more details.
        """
        cert_filename = 'dcos-ca-certificate.crt'
        key_filename = 'dcos-ca-certificate-key.key'

        genconf = Path('/genconf')
        installer_cert_path = genconf / cert_filename
        installer_key_path = genconf / key_filename

        cert_dir_on_host = Path('tests/test_dcos_e2e/certificates').resolve()
        cert_path = cert_dir_on_host / cert_filename
        ca_key_path = cert_dir_on_host / key_filename

        master_key_path = Path(
            '/var/lib/dcos/pki/tls/CA/private/custom_ca.key', )

        superuser_username = str(uuid.uuid4())
        superuser_password = str(uuid.uuid4())

        config = {
            'superuser_username': superuser_username,
            'superuser_password_hash': sha512_crypt.hash(superuser_password),
            'security': 'strict',
            'ca_certificate_path': str(installer_cert_path),
            'ca_certificate_key_path': str(installer_key_path),
            'fault_domain_enabled': False,
            'license_key_contents': license_key_contents,
        }

        files_to_copy_to_genconf_dir = [
            (cert_path, installer_cert_path),
            (ca_key_path, installer_key_path),
        ]

        with Cluster(
                cluster_backend=cluster_backend,
                masters=1,
                agents=0,
                public_agents=0,
        ) as cluster:
            (master, ) = cluster.masters
            master.send_file(
                local_path=ca_key_path,
                remote_path=master_key_path,
            )

            cluster.install_dcos_from_path(
                build_artifact=enterprise_artifact,
                dcos_config={
                    **cluster.base_config,
                    **config,
                },
                log_output_live=True,
                ip_detect_path=cluster_backend.ip_detect_path,
                files_to_copy_to_genconf_dir=files_to_copy_to_genconf_dir,
            )

            # We exercise the "http_checks=False" code here but we do not test
            # its functionality. It is a temporary measure while we wait for
            # more thorough dcos-checks.
            cluster.wait_for_dcos_ee(
                superuser_username=superuser_username,
                superuser_password=superuser_password,
                http_checks=False,
            )
            cluster.wait_for_dcos_ee(
                superuser_username=superuser_username,
                superuser_password=superuser_password,
            )
            master_url = 'https://' + str(master.public_ip_address)
            response = requests.get(master_url, verify=str(cert_path))
            response.raise_for_status()
Exemple #32
0
    def test_copy_directory_to_node_installer_genconf_dir(
        self,
        cluster_backend: ClusterBackend,
        enterprise_artifact: Path,
        license_key_contents: str,
    ) -> None:
        """
        Directories can be copied to the ``genconf`` directory from the host
        to the installing node when installing DC/OS.

        Supplying a custom CA certificate directory is a good example for this
        capability. See CA certificate tests in Enterprise DC/OS for more
        details.
        """
        cert_filename = 'dcos-ca-certificate.crt'
        key_filename = 'dcos-ca-certificate-key.key'

        genconf = Path('/genconf')
        installer_cert_path = genconf / 'certificates' / cert_filename
        installer_key_path = genconf / 'certificates' / key_filename

        cert_dir_on_host = Path('tests/test_dcos_e2e/certificates').resolve()
        cert_path = cert_dir_on_host / cert_filename
        ca_key_path = cert_dir_on_host / key_filename

        master_key_path = Path(
            '/var/lib/dcos/pki/tls/CA/private/custom_ca.key', )

        superuser_username = str(uuid.uuid4())
        superuser_password = str(uuid.uuid4())

        config = {
            'superuser_username': superuser_username,
            'superuser_password_hash': sha512_crypt.hash(superuser_password),
            'security': 'strict',
            'ca_certificate_path': str(installer_cert_path),
            'ca_certificate_key_path': str(installer_key_path),
            'fault_domain_enabled': False,
            'license_key_contents': license_key_contents,
        }

        with Cluster(
                cluster_backend=cluster_backend,
                masters=1,
                agents=0,
                public_agents=0,
        ) as cluster:
            (master, ) = cluster.masters
            master.send_file(
                local_path=ca_key_path,
                remote_path=master_key_path,
            )
            master.install_dcos_from_path(
                build_artifact=enterprise_artifact,
                dcos_config={
                    **cluster.base_config,
                    **config,
                },
                ip_detect_path=cluster_backend.ip_detect_path,
                role=Role.MASTER,
                files_to_copy_to_genconf_dir=[(cert_dir_on_host, genconf)],
                log_output_live=True,
            )

            cluster.wait_for_dcos_ee(
                superuser_username=superuser_username,
                superuser_password=superuser_password,
            )
            master_url = 'https://' + str(master.public_ip_address)
            response = requests.get(master_url, verify=str(cert_path))
            response.raise_for_status()
Exemple #33
0
def create(
    agents: int,
    artifact: str,
    extra_config: Dict[str, Any],
    masters: int,
    public_agents: int,
    variant: str,
    workspace_dir: Optional[Path],
    license_key: Optional[str],
    security_mode: Optional[str],
    copy_to_master: List[Tuple[Path, Path]],
    cluster_id: str,
) -> None:
    """
    Create a DC/OS cluster.

        DC/OS Enterprise

            \b
            DC/OS Enterprise clusters require different configuration variables to DC/OS OSS.
            For example, enterprise clusters require the following configuration parameters:

            ``superuser_username``, ``superuser_password_hash``, ``fault_domain_enabled``, ``license_key_contents``

            \b
            These can all be set in ``--extra-config``.
            However, some defaults are provided for all but the license key.

            \b
            The default superuser username is ``admin``.
            The default superuser password is ``admin``.
            The default ``fault_domain_enabled`` is ``false``.

            \b
            ``license_key_contents`` must be set for DC/OS Enterprise 1.11 and above.
            This is set to one of the following, in order:

            \b
            * The ``license_key_contents`` set in ``--extra-config``.
            * The contents of the path given with ``--license-key``.
            * The contents of the path set in the ``DCOS_LICENSE_KEY_PATH`` environment variable.

            \b
            If none of these are set, ``license_key_contents`` is not given.
    """  # noqa: E501
    base_workspace_dir = workspace_dir or Path(tempfile.gettempdir())
    workspace_dir = base_workspace_dir / uuid.uuid4().hex
    workspace_dir.mkdir(parents=True)

    doctor_message = 'Try `dcos-vagrant doctor` for troubleshooting help.'

    artifact_path = Path(artifact).resolve()

    if variant == 'auto':
        variant = get_variant(
            artifact_path=artifact_path,
            workspace_dir=workspace_dir,
            doctor_message=doctor_message,
        )

    enterprise = bool(variant == 'enterprise')
    description = {
        CLUSTER_ID_DESCRIPTION_KEY: cluster_id,
        WORKSPACE_DIR_DESCRIPTION_KEY: str(workspace_dir),
        VARIANT_DESCRIPTION_KEY: 'ee' if enterprise else '',
    }
    cluster_backend = Vagrant(
        workspace_dir=workspace_dir,
        virtualbox_description=json.dumps(obj=description),
    )
    if enterprise:
        superuser_username = '******'
        superuser_password = '******'

        enterprise_extra_config = {
            'superuser_username': superuser_username,
            'superuser_password_hash': sha512_crypt.hash(superuser_password),
            'fault_domain_enabled': False,
        }
        if license_key is not None:
            key_contents = Path(license_key).read_text()
            enterprise_extra_config['license_key_contents'] = key_contents

        extra_config = {**enterprise_extra_config, **extra_config}
        if security_mode is not None:
            extra_config['security'] = security_mode

    try:
        cluster = Cluster(
            cluster_backend=cluster_backend,
            masters=masters,
            agents=agents,
            public_agents=public_agents,
            files_to_copy_to_installer=[],
        )
    except CalledProcessError as exc:
        click.echo('Error creating cluster.', err=True)
        click.echo(doctor_message)
        sys.exit(exc.returncode)

    for node in cluster.masters:
        for path_pair in copy_to_master:
            local_path, remote_path = path_pair
            node.send_file(
                local_path=local_path,
                remote_path=remote_path,
            )

    try:
        with click_spinner.spinner():
            cluster.install_dcos_from_path(
                build_artifact=artifact_path,
                dcos_config={
                    **cluster.base_config,
                    **extra_config,
                },
            )
    except CalledProcessError as exc:
        click.echo('Error installing DC/OS.', err=True)
        click.echo(doctor_message)
        cluster.destroy()
        sys.exit(exc.returncode)