Exemple #1
0
    def _get_branch_dict(self, cluster: str, instance: str, config: utils.InstanceConfig) -> utils.BranchDictV2:
        if self._deployments_json is None:
            self._deployments_json = load_v2_deployments_json(self._service, soa_dir=self._soa_dir)

        branch = config.get_branch()
        deploy_group = config.get_deploy_group()
        return self._deployments_json.get_branch_dict(self._service, branch, deploy_group)
Exemple #2
0
    def _get_allowed_locations_and_hosts(
        self, instance_config: InstanceConfig
    ) -> Dict[str, Sequence[SmartstackHost]]:

        monitoring_blacklist = instance_config.get_monitoring_blacklist(
            system_deploy_blacklist=self._system_paasta_config.
            get_deploy_blacklist(), )
        filtered_nodes = kubernetes_tools.filter_nodes_by_blacklist(
            nodes=self.nodes,
            blacklist=monitoring_blacklist,
            whitelist=None,
        )
        discover_location_type = kubernetes_tools.load_service_namespace_config(
            service=instance_config.service,
            namespace=instance_config.instance,
            soa_dir=instance_config.soa_dir,
        ).get_discover()
        attribute_to_nodes = kubernetes_tools.get_nodes_grouped_by_attribute(
            nodes=filtered_nodes,
            attribute=discover_location_type,
        )
        ret: Dict[str, Sequence[SmartstackHost]] = {}
        for attr, nodes in attribute_to_nodes.items():
            ret[attr] = [
                SmartstackHost(
                    hostname=node.metadata.labels['yelp.com/hostname'],
                    pool=node.metadata.labels['yelp.com/pool'],
                ) for node in nodes
            ]
        return ret
Exemple #3
0
    def _get_allowed_locations_and_hosts(
        self, instance_config: InstanceConfig
    ) -> Dict[str, Sequence[SmartstackHost]]:
        """Returns a dict of locations and lists of corresponding mesos slaves
        where deployment of the instance is allowed.

        :param instance_config: An instance of MarathonServiceConfig
        :returns: A dict {"uswest1-prod": [SmartstackHost(), SmartstackHost(), ...]}
        """
        monitoring_blacklist = instance_config.get_monitoring_blacklist(
            system_deploy_blacklist=self._system_paasta_config.
            get_deploy_blacklist(), )
        filtered_slaves = mesos_tools.filter_mesos_slaves_by_blacklist(
            slaves=self._mesos_slaves,
            blacklist=monitoring_blacklist,
            whitelist=None,
        )
        discover_location_type = marathon_tools.load_service_namespace_config(
            service=instance_config.service,
            namespace=instance_config.instance,
            soa_dir=instance_config.soa_dir,
        ).get_discover()
        attribute_to_slaves = mesos_tools.get_mesos_slaves_grouped_by_attribute(
            slaves=filtered_slaves,
            attribute=discover_location_type,
        )
        ret: Dict[str, Sequence[SmartstackHost]] = {}
        for attr, slaves in attribute_to_slaves.items():
            ret[attr] = [
                SmartstackHost(hostname=slave['hostname'],
                               pool=slave['attributes']['pool'])
                for slave in slaves
            ]
        return ret
Exemple #4
0
    def _get_allowed_locations_and_slaves(
            self, instance_config: InstanceConfig) -> Dict[str, List[dict]]:
        """Returns a dict of locations and lists of corresponding mesos slaves
        where deployment of the instance is allowed.

        :param instance_config: An instance of MarathonServiceConfig
        :returns: A dict {"uswest1-prod": ['hostname1', 'hostname2], ...}.
        """
        monitoring_blacklist = instance_config.get_monitoring_blacklist(
            system_deploy_blacklist=self._system_paasta_config.
            get_deploy_blacklist(), )
        filtered_slaves = mesos_tools.filter_mesos_slaves_by_blacklist(
            slaves=self._mesos_slaves,
            blacklist=monitoring_blacklist,
            whitelist=None,
        )
        discover_location_type = marathon_tools.load_service_namespace_config(
            service=instance_config.service,
            namespace=instance_config.instance,
            soa_dir=instance_config.soa_dir,
        ).get_discover()
        return mesos_tools.get_mesos_slaves_grouped_by_attribute(
            slaves=filtered_slaves,
            attribute=discover_location_type,
        )
    def get_replication_for_instance(
            self,
            instance_config: InstanceConfig) -> Dict[str, Dict[str, int]]:
        """Returns the number of registered instances in each discoverable location.

        :param instance_config: An instance of MarathonServiceConfig.
        :returns: a dict {'location_type': {'service.instance': int}}
        """
        replication_info = {}
        attribute_host_dict = self._get_allowed_locations_and_hosts(
            instance_config)
        instance_pool = instance_config.get_pool()
        for location, hosts in attribute_host_dict.items():
            hostname = self._get_first_host_in_pool(hosts, instance_pool)
            replication_info[location] = self._get_replication_info(
                location, hostname, instance_config)
        return replication_info
Exemple #6
0
class TestConfigureAndRunDockerContainer:

    instance_config = InstanceConfig(
        cluster="fake_cluster",
        instance="fake_instance",
        service="fake_service",
        config_dict={
            "extra_volumes": [{
                "hostPath": "/h1",
                "containerPath": "/c1",
                "mode": "RO"
            }]
        },
        branch_dict={"docker_image": "fake_service:fake_sha"},
    )

    system_paasta_config = SystemPaastaConfig(
        {
            "volumes": [{
                "hostPath": "/h2",
                "containerPath": "/c2",
                "mode": "RO"
            }]
        },
        "fake_dir",
    )

    @pytest.fixture
    def mock_create_spark_config_str(self):
        with mock.patch(
                "paasta_tools.cli.cmds.spark_run.create_spark_config_str",
                autospec=True) as _mock_create_spark_config_str:
            yield _mock_create_spark_config_str

    def test_configure_and_run_docker_container(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_pick_random_port.return_value = 123
        mock_get_username.return_value = "fake_user"
        mock_get_spark_config.return_value = {"spark.app.name": "fake_app"}
        mock_run_docker_container.return_value = 0
        mock_get_aws_credentials.return_value = ("id", "secret")

        args = mock.MagicMock()
        args.aws_region = "fake_region"
        args.cluster = "fake_cluster"
        args.cmd = "pyspark"
        args.work_dir = "/fake_dir:/spark_driver"
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False

        with mock.patch(
                "paasta_tools.utils.get_service_docker_registry",
                autospec=True,
                return_value="fake-registry",
        ):
            retcode = configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )

        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name="paasta_spark_run_fake_user_123",
            volumes=[
                "/h1:/c1:ro",
                "/h2:/c2:ro",
                "/fake_dir:/spark_driver:rw",
                "/etc/passwd:/etc/passwd:ro",
                "/etc/group:/etc/group:ro",
                "/nail/home:/nail/home:rw",
            ],
            environment={
                "PAASTA_SERVICE": "fake_service",
                "PAASTA_INSTANCE": "fake_instance",
                "PAASTA_CLUSTER": "fake_cluster",
                "PAASTA_INSTANCE_TYPE": "spark",
                "PAASTA_DEPLOY_GROUP": "fake_cluster.fake_instance",
                "PAASTA_DOCKER_IMAGE": "fake_service:fake_sha",
                "PAASTA_LAUNCHED_BY": mock.ANY,
                "AWS_ACCESS_KEY_ID": "id",
                "AWS_SECRET_ACCESS_KEY": "secret",
                "AWS_DEFAULT_REGION": "fake_region",
                "SPARK_USER": "******",
                "SPARK_OPTS": "--conf spark.app.name=fake_app",
                "PAASTA_RESOURCE_CPUS": "0.25",
                "PAASTA_RESOURCE_DISK": "1024",
                "PAASTA_RESOURCE_MEM": "1024",
                "PAASTA_GIT_SHA": "fake_ser",
            },
            docker_img="fake-registry/fake-service",
            docker_cmd="pyspark --conf spark.app.name=fake_app",
            dry_run=True,
            nvidia=False,
        )

    def test_configure_and_run_docker_container_nvidia(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_get_aws_credentials.return_value = ("id", "secret")
        with mock.patch(
                "paasta_tools.cli.cmds.spark_run.emit_resource_requirements",
                autospec=True) as mock_emit_resource_requirements, mock.patch(
                    "paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                    autospec=True):
            mock_get_spark_config.return_value = {
                "spark.cores.max": 5,
                "spark.master": "mesos://spark.master",
            }
            args = mock.MagicMock(cmd="pyspark", nvidia=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["nvidia"]
            assert mock_emit_resource_requirements.called

    def test_configure_and_run_docker_container_mrjob(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_get_aws_credentials.return_value = ("id", "secret")
        with mock.patch(
                "paasta_tools.cli.cmds.spark_run.emit_resource_requirements",
                autospec=True) as mock_emit_resource_requirements, mock.patch(
                    "paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                    autospec=True):
            mock_get_spark_config.return_value = {
                "spark.cores.max": 5,
                "spark.master": "mesos://spark.master",
            }
            args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["docker_cmd"] == (
                "python mrjob_wrapper.py --spark-master=mesos://spark.master --jobconf spark.cores.max=5"
            )

            assert mock_emit_resource_requirements.called

    def test_suppress_clusterman_metrics_errors(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
        mock_create_spark_config_str,
    ):
        mock_get_aws_credentials.return_value = ("id", "secret")

        with mock.patch(
                "paasta_tools.cli.cmds.spark_run.emit_resource_requirements",
                autospec=True) as mock_emit_resource_requirements, mock.patch(
                    "paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                    autospec=True):
            mock_emit_resource_requirements.side_effect = Boto3Error
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"

            args = mock.MagicMock(suppress_clusterman_metrics_errors=False,
                                  cmd="pyspark")
            with pytest.raises(Boto3Error):
                configure_and_run_docker_container(
                    args=args,
                    docker_img="fake-registry/fake-service",
                    instance_config=self.instance_config,
                    system_paasta_config=self.system_paasta_config,
                )

            # make sure we don't blow up when this setting is True
            args.suppress_clusterman_metrics_errors = True
            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )

    def test_dont_emit_metrics_for_inappropriate_commands(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
        mock_create_spark_config_str,
    ):
        mock_get_aws_credentials.return_value = ("id", "secret")
        with mock.patch(
                "paasta_tools.cli.cmds.spark_run.emit_resource_requirements",
                autospec=True) as mock_emit_resource_requirements, mock.patch(
                    "paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                    autospec=True):
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
            args = mock.MagicMock(cmd="bash", mrjob=False)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )
            assert not mock_emit_resource_requirements.called
Exemple #7
0
class TestConfigureAndRunDockerContainer:

    instance_config = InstanceConfig(
        cluster="fake_cluster",
        instance="fake_instance",
        service="fake_service",
        config_dict={
            "extra_volumes": [{"hostPath": "/h1", "containerPath": "/c1", "mode": "RO"}]
        },
        branch_dict={"docker_image": "fake_service:fake_sha"},
    )

    system_paasta_config = SystemPaastaConfig(
        {"volumes": [{"hostPath": "/h2", "containerPath": "/c2", "mode": "RO"}]},
        "fake_dir",
    )

    @pytest.fixture
    def mock_create_spark_config_str(self):
        with mock.patch(
            "paasta_tools.cli.cmds.spark_run.create_spark_config_str", autospec=True
        ) as _mock_create_spark_config_str:
            yield _mock_create_spark_config_str

    @pytest.mark.parametrize(
        ["cluster_manager", "spark_args_volumes", "expected_volumes"],
        [
            (
                spark_run.CLUSTER_MANAGER_MESOS,
                {
                    "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw"
                },
                ["/mesos/volume:/mesos/volume:rw"],
            ),
            (
                spark_run.CLUSTER_MANAGER_K8S,
                {
                    "spark.kubernetes.executor.volumes.hostPath.0.mount.readOnly": "true",
                    "spark.kubernetes.executor.volumes.hostPath.0.mount.path": "/k8s/volume0",
                    "spark.kubernetes.executor.volumes.hostPath.0.options.path": "/k8s/volume0",
                    "spark.kubernetes.executor.volumes.hostPath.1.mount.readOnly": "false",
                    "spark.kubernetes.executor.volumes.hostPath.1.mount.path": "/k8s/volume1",
                    "spark.kubernetes.executor.volumes.hostPath.1.options.path": "/k8s/volume1",
                },
                ["/k8s/volume0:/k8s/volume0:ro", "/k8s/volume1:/k8s/volume1:rw"],
            ),
        ],
    )
    def test_configure_and_run_docker_container(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
        cluster_manager,
        spark_args_volumes,
        expected_volumes,
    ):
        mock_get_username.return_value = "fake_user"
        spark_conf = {
            "spark.app.name": "fake_app",
            "spark.ui.port": "1234",
            **spark_args_volumes,
        }
        mock_run_docker_container.return_value = 0

        args = mock.MagicMock()
        args.aws_region = "fake_region"
        args.cluster = "fake_cluster"
        args.cmd = "pyspark"
        args.work_dir = "/fake_dir:/spark_driver"
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False
        args.enable_compact_bin_packing = False
        args.cluster_manager = cluster_manager
        args.disable_aws_credential_env_variables = False
        args.docker_cpu_limit = False
        args.docker_memory_limit = False
        with mock.patch.object(
            self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}
        ):
            retcode = configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=cluster_manager,
                pod_template_path="unique-run",
            )
        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name="fake_app",
            volumes=(
                expected_volumes
                + ["/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw"]
            ),
            environment={
                "env1": "val1",
                "AWS_ACCESS_KEY_ID": "id",
                "AWS_SECRET_ACCESS_KEY": "secret",
                "AWS_SESSION_TOKEN": "token",
                "AWS_DEFAULT_REGION": "fake_region",
                "SPARK_OPTS": mock_create_spark_config_str.return_value,
                "SPARK_USER": "******",
                "PAASTA_INSTANCE_TYPE": "spark",
                "PAASTA_LAUNCHED_BY": mock.ANY,
            },
            docker_img="fake-registry/fake-service",
            docker_cmd=mock_get_docker_cmd.return_value,
            dry_run=True,
            nvidia=False,
            docker_memory_limit="2g",
            docker_cpu_limit="1",
        )

    @pytest.mark.parametrize(
        ["cluster_manager", "spark_args_volumes", "expected_volumes"],
        [
            (
                spark_run.CLUSTER_MANAGER_MESOS,
                {
                    "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw"
                },
                ["/mesos/volume:/mesos/volume:rw"],
            ),
            (
                spark_run.CLUSTER_MANAGER_K8S,
                {
                    "spark.kubernetes.executor.volumes.hostPath.0.mount.readOnly": "true",
                    "spark.kubernetes.executor.volumes.hostPath.0.mount.path": "/k8s/volume0",
                    "spark.kubernetes.executor.volumes.hostPath.0.options.path": "/k8s/volume0",
                    "spark.kubernetes.executor.volumes.hostPath.1.mount.readOnly": "false",
                    "spark.kubernetes.executor.volumes.hostPath.1.mount.path": "/k8s/volume1",
                    "spark.kubernetes.executor.volumes.hostPath.1.options.path": "/k8s/volume1",
                },
                ["/k8s/volume0:/k8s/volume0:ro", "/k8s/volume1:/k8s/volume1:rw"],
            ),
        ],
    )
    def test_configure_and_run_docker_driver_resource_limits_config(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
        cluster_manager,
        spark_args_volumes,
        expected_volumes,
    ):
        mock_get_username.return_value = "fake_user"
        spark_conf = {
            "spark.app.name": "fake_app",
            "spark.ui.port": "1234",
            "spark.driver.memory": "1g",
            "spark.driver.cores": "2",
            **spark_args_volumes,
        }
        mock_run_docker_container.return_value = 0

        args = mock.MagicMock()
        args.aws_region = "fake_region"
        args.cluster = "fake_cluster"
        args.cmd = "pyspark"
        args.work_dir = "/fake_dir:/spark_driver"
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False
        args.enable_compact_bin_packing = False
        args.disable_aws_credential_env_variables = False
        args.cluster_manager = cluster_manager
        args.docker_cpu_limit = 3
        args.docker_memory_limit = "4g"
        with mock.patch.object(
            self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}
        ):
            retcode = configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=cluster_manager,
                pod_template_path="unique-run",
            )
        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name="fake_app",
            volumes=(
                expected_volumes
                + ["/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw"]
            ),
            environment={
                "env1": "val1",
                "AWS_ACCESS_KEY_ID": "id",
                "AWS_SECRET_ACCESS_KEY": "secret",
                "AWS_SESSION_TOKEN": "token",
                "AWS_DEFAULT_REGION": "fake_region",
                "SPARK_OPTS": mock_create_spark_config_str.return_value,
                "SPARK_USER": "******",
                "PAASTA_INSTANCE_TYPE": "spark",
                "PAASTA_LAUNCHED_BY": mock.ANY,
            },
            docker_img="fake-registry/fake-service",
            docker_cmd=mock_get_docker_cmd.return_value,
            dry_run=True,
            nvidia=False,
            docker_memory_limit="4g",
            docker_cpu_limit=3,
        )

    @pytest.mark.parametrize(
        ["cluster_manager", "spark_args_volumes", "expected_volumes"],
        [
            (
                spark_run.CLUSTER_MANAGER_MESOS,
                {
                    "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw"
                },
                ["/mesos/volume:/mesos/volume:rw"],
            ),
            (
                spark_run.CLUSTER_MANAGER_K8S,
                {
                    "spark.kubernetes.executor.volumes.hostPath.0.mount.readOnly": "true",
                    "spark.kubernetes.executor.volumes.hostPath.0.mount.path": "/k8s/volume0",
                    "spark.kubernetes.executor.volumes.hostPath.0.options.path": "/k8s/volume0",
                    "spark.kubernetes.executor.volumes.hostPath.1.mount.readOnly": "false",
                    "spark.kubernetes.executor.volumes.hostPath.1.mount.path": "/k8s/volume1",
                    "spark.kubernetes.executor.volumes.hostPath.1.options.path": "/k8s/volume1",
                },
                ["/k8s/volume0:/k8s/volume0:ro", "/k8s/volume1:/k8s/volume1:rw"],
            ),
        ],
    )
    def test_configure_and_run_docker_driver_resource_limits(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
        cluster_manager,
        spark_args_volumes,
        expected_volumes,
    ):
        mock_get_username.return_value = "fake_user"
        spark_conf = {
            "spark.app.name": "fake_app",
            "spark.ui.port": "1234",
            "spark.driver.memory": "1g",
            "spark.driver.cores": "2",
            **spark_args_volumes,
        }
        mock_run_docker_container.return_value = 0

        args = mock.MagicMock()
        args.aws_region = "fake_region"
        args.cluster = "fake_cluster"
        args.cmd = "pyspark"
        args.work_dir = "/fake_dir:/spark_driver"
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False
        args.enable_compact_bin_packing = False
        args.cluster_manager = cluster_manager
        args.disable_aws_credential_env_variables = False
        args.docker_cpu_limit = False
        args.docker_memory_limit = False
        with mock.patch.object(
            self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}
        ):
            retcode = configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=cluster_manager,
                pod_template_path="unique-run",
            )
        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name="fake_app",
            volumes=(
                expected_volumes
                + ["/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw"]
            ),
            environment={
                "env1": "val1",
                "AWS_ACCESS_KEY_ID": "id",
                "AWS_SECRET_ACCESS_KEY": "secret",
                "AWS_SESSION_TOKEN": "token",
                "AWS_DEFAULT_REGION": "fake_region",
                "SPARK_OPTS": mock_create_spark_config_str.return_value,
                "SPARK_USER": "******",
                "PAASTA_INSTANCE_TYPE": "spark",
                "PAASTA_LAUNCHED_BY": mock.ANY,
            },
            docker_img="fake-registry/fake-service",
            docker_cmd=mock_get_docker_cmd.return_value,
            dry_run=True,
            nvidia=False,
            docker_memory_limit="2g",
            docker_cpu_limit="2",
        )

    def test_configure_and_run_docker_container_nvidia(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch(
            "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True
        ):
            spark_conf = {
                "spark.cores.max": "5",
                "spark.master": "mesos://spark.master",
                "spark.ui.port": "1234",
                "spark.app.name": "fake app",
            }
            args = mock.MagicMock(cmd="pyspark", nvidia=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                pod_template_path="unique-run",
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["nvidia"]
            assert mock_send_and_calculate_resources_cost.called

    def test_configure_and_run_docker_container_mrjob(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch(
            "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True
        ):
            spark_conf = {
                "spark.cores.max": 5,
                "spark.master": "mesos://spark.master",
                "spark.ui.port": "1234",
                "spark.app.name": "fake_app",
            }
            args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                pod_template_path="unique-run",
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["docker_cmd"] == mock_get_docker_cmd.return_value

            assert mock_send_and_calculate_resources_cost.called

    def test_suppress_clusterman_metrics_errors(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch(
            "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True
        ):
            mock_send_and_calculate_resources_cost.side_effect = Boto3Error
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
            spark_conf = {
                "spark.cores.max": 5,
                "spark.ui.port": "1234",
                "spark.app.name": "fake app",
            }
            args = mock.MagicMock(
                suppress_clusterman_metrics_errors=False, cmd="pyspark"
            )
            with pytest.raises(Boto3Error):
                configure_and_run_docker_container(
                    args=args,
                    docker_img="fake-registry/fake-service",
                    instance_config=self.instance_config,
                    system_paasta_config=self.system_paasta_config,
                    aws_creds=("id", "secret", "token"),
                    spark_conf=spark_conf,
                    cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                    pod_template_path="unique-run",
                )

            # make sure we don't blow up when this setting is True
            args.suppress_clusterman_metrics_errors = True
            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                pod_template_path="unique-run",
            )

    def test_dont_emit_metrics_for_inappropriate_commands(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch(
            "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True
        ):
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
            args = mock.MagicMock(cmd="bash", mrjob=False)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf={"spark.ui.port": "1234", "spark.app.name": "fake_app"},
                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                pod_template_path="unique-run",
            )
            assert not mock_send_and_calculate_resources_cost.called
Exemple #8
0
def configure_and_run_docker_container(
    args: argparse.Namespace,
    docker_img: str,
    instance_config: InstanceConfig,
    system_paasta_config: SystemPaastaConfig,
    spark_conf: Mapping[str, str],
    aws_creds: Tuple[Optional[str], Optional[str], Optional[str]],
    cluster_manager: str,
    pod_template_path: str,
) -> int:

    # driver specific volumes
    volumes: List[str] = []

    docker_memory_limit = _calculate_docker_memory_limit(
        spark_conf, args.docker_memory_limit
    )
    docker_cpu_limit = _calculate_docker_cpu_limit(
        spark_conf,
        args.docker_cpu_limit,
    )

    if cluster_manager == CLUSTER_MANAGER_MESOS:
        volumes = (
            spark_conf.get("spark.mesos.executor.docker.volumes", "").split(",")
            if spark_conf.get("spark.mesos.executor.docker.volumes", "") != ""
            else []
        )
    elif cluster_manager == CLUSTER_MANAGER_K8S:
        volume_names = [
            re.match(
                r"spark.kubernetes.executor.volumes.hostPath.(\d+).mount.path", key
            ).group(1)
            for key in spark_conf.keys()
            if "spark.kubernetes.executor.volumes.hostPath." in key
            and ".mount.path" in key
        ]
        for volume_name in volume_names:
            read_only = (
                "ro"
                if spark_conf.get(
                    f"spark.kubernetes.executor.volumes.hostPath.{volume_name}.mount.readOnly"
                )
                == "true"
                else "rw"
            )
            container_path = spark_conf.get(
                f"spark.kubernetes.executor.volumes.hostPath.{volume_name}.mount.path"
            )
            host_path = spark_conf.get(
                f"spark.kubernetes.executor.volumes.hostPath.{volume_name}.options.path"
            )
            volumes.append(f"{host_path}:{container_path}:{read_only}")

    volumes.append("%s:rw" % args.work_dir)
    volumes.append("/nail/home:/nail/home:rw")

    if args.enable_compact_bin_packing:
        volumes.append(f"{pod_template_path}:{pod_template_path}:rw")

    environment = instance_config.get_env_dictionary()  # type: ignore
    spark_conf_str = create_spark_config_str(spark_conf, is_mrjob=args.mrjob)
    environment.update(
        get_spark_env(args, spark_conf_str, aws_creds, spark_conf["spark.ui.port"])
    )  # type:ignore

    webui_url = get_webui_url(spark_conf["spark.ui.port"])
    webui_url_msg = f"\nSpark monitoring URL {webui_url}\n"

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if "history-server" in docker_cmd:
        print(f"\nSpark history server URL {webui_url}\n")
    elif any(c in docker_cmd for c in ["pyspark", "spark-shell", "spark-submit"]):
        signalfx_url = get_signalfx_url(spark_conf)
        signalfx_url_msg = f"\nSignalfx dashboard: {signalfx_url}\n"
        print(webui_url_msg)
        print(signalfx_url_msg)
        log.info(webui_url_msg)
        log.info(signalfx_url_msg)
        history_server_url = get_history_url(spark_conf)
        if history_server_url:
            history_server_url_msg = (
                f"\nAfter the job is finished, you can find the spark UI from {history_server_url}\n"
                "Check y/spark-recent-history for faster access to prod logs\n"
            )
            print(history_server_url_msg)
            log.info(history_server_url_msg)
    print(f"Selected cluster manager: {cluster_manager}\n")

    if clusterman_metrics and _should_get_resource_requirements(docker_cmd, args.mrjob):
        try:
            if cluster_manager == CLUSTER_MANAGER_MESOS:
                print("Sending resource request metrics to Clusterman")
                hourly_cost, resources = send_and_calculate_resources_cost(
                    clusterman_metrics, spark_conf, webui_url, args.pool
                )
            else:
                resources = get_resources_requested(spark_conf)
                hourly_cost = get_spark_hourly_cost(
                    clusterman_metrics,
                    resources,
                    spark_conf["spark.executorEnv.PAASTA_CLUSTER"],
                    args.pool,
                )
            message = (
                f"Resource request ({resources['cpus']} cpus and {resources['mem']} MB memory total)"
                f" is estimated to cost ${hourly_cost} per hour"
            )
            if clusterman_metrics.util.costs.should_warn(hourly_cost):
                print(PaastaColors.red(f"WARNING: {message}"))
            else:
                print(message)
        except Boto3Error as e:
            print(
                PaastaColors.red(
                    f"Encountered {e} while attempting to send resource requirements to Clusterman."
                )
            )
            if args.suppress_clusterman_metrics_errors:
                print(
                    "Continuing anyway since --suppress-clusterman-metrics-errors was passed"
                )
            else:
                raise

    final_spark_submit_cmd_msg = f"Final command: {docker_cmd}"
    print(PaastaColors.grey(final_spark_submit_cmd_msg))
    log.info(final_spark_submit_cmd_msg)
    return run_docker_container(
        container_name=spark_conf["spark.app.name"],
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
        nvidia=args.nvidia,
        docker_memory_limit=docker_memory_limit,
        docker_cpu_limit=docker_cpu_limit,
    )
class TestConfigureAndRunDockerContainer:

    instance_config = InstanceConfig(
        cluster="fake_cluster",
        instance="fake_instance",
        service="fake_service",
        config_dict={
            "extra_volumes": [{
                "hostPath": "/h1",
                "containerPath": "/c1",
                "mode": "RO"
            }]
        },
        branch_dict={"docker_image": "fake_service:fake_sha"},
    )

    system_paasta_config = SystemPaastaConfig(
        {
            "volumes": [{
                "hostPath": "/h2",
                "containerPath": "/c2",
                "mode": "RO"
            }]
        },
        "fake_dir",
    )

    @pytest.fixture
    def mock_create_spark_config_str(self):
        with mock.patch(
                "paasta_tools.cli.cmds.spark_run.create_spark_config_str",
                autospec=True) as _mock_create_spark_config_str:
            yield _mock_create_spark_config_str

    def test_configure_and_run_docker_container(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        mock_get_username.return_value = "fake_user"
        spark_conf = {"spark.app.name": "fake_app", "spark.ui.port": "1234"}
        mock_run_docker_container.return_value = 0

        args = mock.MagicMock()
        args.aws_region = "fake_region"
        args.cluster = "fake_cluster"
        args.cmd = "pyspark"
        args.work_dir = "/fake_dir:/spark_driver"
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False
        with mock.patch.object(self.instance_config,
                               "get_env_dictionary",
                               return_value={"env1": "val1"}):
            retcode = configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
            )
        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name="fake_app",
            volumes=[
                "/fake_dir:/spark_driver:rw",
                "/nail/home:/nail/home:rw",
            ],
            environment={
                "env1": "val1",
                "AWS_ACCESS_KEY_ID": "id",
                "AWS_SECRET_ACCESS_KEY": "secret",
                "AWS_SESSION_TOKEN": "token",
                "AWS_DEFAULT_REGION": "fake_region",
                "SPARK_OPTS": mock_create_spark_config_str.return_value,
                "SPARK_USER": "******",
                "PAASTA_INSTANCE_TYPE": "spark",
                "PAASTA_LAUNCHED_BY": mock.ANY,
            },
            docker_img="fake-registry/fake-service",
            docker_cmd=mock_get_docker_cmd.return_value,
            dry_run=True,
            nvidia=False,
        )

    def test_configure_and_run_docker_container_nvidia(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                        autospec=True):
            spark_conf = {
                "spark.cores.max": "5",
                "spark.master": "mesos://spark.master",
                "spark.ui.port": "1234",
                "spark.app.name": "fake app",
            }
            args = mock.MagicMock(cmd="pyspark", nvidia=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["nvidia"]
            assert mock_send_and_calculate_resources_cost.called

    def test_configure_and_run_docker_container_mrjob(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                        autospec=True):
            spark_conf = {
                "spark.cores.max": 5,
                "spark.master": "mesos://spark.master",
                "spark.ui.port": "1234",
                "spark.app.name": "fake_app",
            }
            args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["docker_cmd"] == mock_get_docker_cmd.return_value

            assert mock_send_and_calculate_resources_cost.called

    def test_suppress_clusterman_metrics_errors(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                        autospec=True):
            mock_send_and_calculate_resources_cost.side_effect = Boto3Error
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
            spark_conf = {
                "spark.cores.max": 5,
                "spark.ui.port": "1234",
                "spark.app.name": "fake app",
            }
            args = mock.MagicMock(suppress_clusterman_metrics_errors=False,
                                  cmd="pyspark")
            with pytest.raises(Boto3Error):
                configure_and_run_docker_container(
                    args=args,
                    docker_img="fake-registry/fake-service",
                    instance_config=self.instance_config,
                    system_paasta_config=self.system_paasta_config,
                    aws_creds=("id", "secret", "token"),
                    spark_conf=spark_conf,
                )

            # make sure we don't blow up when this setting is True
            args.suppress_clusterman_metrics_errors = True
            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
            )

    def test_dont_emit_metrics_for_inappropriate_commands(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                        autospec=True):
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
            args = mock.MagicMock(cmd="bash", mrjob=False)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf={
                    "spark.ui.port": "1234",
                    "spark.app.name": "fake_app"
                },
            )
            assert not mock_send_and_calculate_resources_cost.called
class TestConfigureAndRunDockerContainer:

    instance_config = InstanceConfig(
        cluster='fake_cluster',
        instance='fake_instance',
        service='fake_service',
        config_dict={
            'extra_volumes': [{
                "hostPath": "/h1",
                "containerPath": "/c1",
                "mode": "RO",
            }],
        },
        branch_dict={'docker_image': 'fake_service:fake_sha'},
    )

    system_paasta_config = SystemPaastaConfig(
        {
            'volumes': [{
                "hostPath": "/h2",
                "containerPath": "/c2",
                "mode": "RO",
            }],
        },
        'fake_dir',
    )

    def test_configure_and_run_docker_container(
        self,
        mock_time,
        mock_run_docker_container,
        mock_create_spark_config_str,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_pick_random_port.return_value = 123
        mock_get_username.return_value = 'fake_user'
        mock_create_spark_config_str.return_value = '--conf spark.app.name=fake_app'
        mock_run_docker_container.return_value = 0
        mock_get_aws_credentials.return_value = ('id', 'secret')
        mock_time.return_value = 1138

        args = mock.MagicMock()
        args.cluster = 'fake_cluster'
        args.cmd = 'pyspark'
        args.work_dir = '/fake_dir:/spark_driver'
        args.dry_run = True

        retcode = configure_and_run_docker_container(
            args=args,
            docker_img='fake-registry/fake-service',
            instance_config=self.instance_config,
            system_paasta_config=self.system_paasta_config,
        )

        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name='paasta_spark_run_fake_user_123_1138',
            volumes=[
                '/h1:/c1:ro',
                '/h2:/c2:ro',
                '/fake_dir:/spark_driver:rw',
                '/etc/passwd:/etc/passwd:ro',
                '/etc/group:/etc/group:ro',
            ],
            environment={
                'PAASTA_SERVICE': 'fake_service',
                'PAASTA_INSTANCE': 'fake_instance',
                'PAASTA_CLUSTER': 'fake_cluster',
                'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance',
                'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha',
                'AWS_ACCESS_KEY_ID': 'id',
                'AWS_SECRET_ACCESS_KEY': 'secret',
                'SPARK_USER': '******',
                'SPARK_OPTS': '--conf spark.app.name=fake_app',
            },
            docker_img='fake-registry/fake-service',
            docker_cmd='pyspark --conf spark.app.name=fake_app',
            dry_run=True,
        )

    def test_suppress_clusterman_metrics_errors(
        self,
        mock_time,
        mock_run_docker_container,
        mock_create_spark_config_str,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_get_aws_credentials.return_value = ('id', 'secret')

        with mock.patch(
            'paasta_tools.cli.cmds.spark_run.emit_resource_requirements', autospec=True,
        ) as mock_emit_resource_requirements, mock.patch(
            'paasta_tools.cli.cmds.spark_run.clusterman_metrics', autospec=True,
        ):
            mock_emit_resource_requirements.side_effect = Boto3Error
            mock_create_spark_config_str.return_value = '--conf spark.cores.max=5'

            args = mock.MagicMock(
                suppress_clusterman_metrics_errors=False,
                cmd='pyspark',
            )
            with pytest.raises(Boto3Error):
                configure_and_run_docker_container(
                    args=args,
                    docker_img='fake-registry/fake-service',
                    instance_config=self.instance_config,
                    system_paasta_config=self.system_paasta_config,
                )

            # make sure we don't blow up when this setting is True
            args.suppress_clusterman_metrics_errors = True
            configure_and_run_docker_container(
                args=args,
                docker_img='fake-registry/fake-service',
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )

    def test_dont_emit_metrics_for_inappropriate_commands(
        self,
        mock_time,
        mock_run_docker_container,
        mock_create_spark_config_str,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_get_aws_credentials.return_value = ('id', 'secret')
        with mock.patch(
            'paasta_tools.cli.cmds.spark_run.emit_resource_requirements', autospec=True,
        ) as mock_emit_resource_requirements, mock.patch(
            'paasta_tools.cli.cmds.spark_run.clusterman_metrics', autospec=True,
        ):
            mock_create_spark_config_str.return_value = '--conf spark.cores.max=5'
            args = mock.MagicMock(cmd='bash')

            configure_and_run_docker_container(
                args=args,
                docker_img='fake-registry/fake-service',
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )
            assert not mock_emit_resource_requirements.called
Exemple #11
0
def test_configure_and_run_docker_container(
    mock_run_docker_container,
    mock_get_spark_configuration,
    mock_get_username,
    mock_pick_random_port,
    mock_getcwd,
    mock_os_path_exists,
):
    mock_pick_random_port.return_value = 123
    mock_getcwd.return_value = 'fake_cwd'
    mock_get_username.return_value = 'fake_user'
    mock_get_spark_configuration.return_value = {'APP_NAME': 'fake_app'}
    mock_run_docker_container.return_value = 0

    args = mock.MagicMock()
    args.cluster = 'fake_cluster'
    args.cmd = 'pyspark'
    args.dry_run = True

    retcode = configure_and_run_docker_container(
        args=args,
        docker_img='fake-registry/fake-service',
        instance_config=InstanceConfig(
            cluster='fake_cluster',
            instance='fake_instance',
            service='fake_service',
            config_dict={
                'extra_volumes': [{
                    "hostPath": "/h1",
                    "containerPath": "/c1",
                    "mode": "RO",
                }],
            },
            branch_dict={'docker_image': 'fake_service:fake_sha'},
        ),
        system_paasta_config=SystemPaastaConfig(
            {
                'volumes': [{
                    "hostPath": "/h2",
                    "containerPath": "/c2",
                    "mode": "RO",
                }],
            },
            'fake_dir',
        ),
    )

    assert retcode == 0
    mock_run_docker_container.assert_called_once_with(
        container_name='paasta_spark_run_fake_user_123',
        volumes=[
            '/h1:/c1:ro',
            '/h2:/c2:ro',
            'fake_cwd:/spark_client:rw',
            '/etc/passwd:/etc/passwd:ro',
            '/etc/group:/etc/group:ro',
        ],
        environment={
            'PAASTA_SERVICE': 'fake_service',
            'PAASTA_INSTANCE': 'fake_instance',
            'PAASTA_CLUSTER': 'fake_cluster',
            'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance',
            'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha',
            'APP_NAME': 'fake_app',
        },
        docker_img='fake-registry/fake-service',
        docker_cmd='pyspark',
        dry_run=True,
    )
Exemple #12
0
def configure_and_run_docker_container(
    args: argparse.Namespace,
    docker_img: str,
    instance_config: InstanceConfig,
    system_paasta_config: SystemPaastaConfig,
) -> int:
    volumes = list()
    for volume in instance_config.get_volumes(system_paasta_config.get_volumes()):
        if os.path.exists(volume["hostPath"]):
            volumes.append(
                "{}:{}:{}".format(
                    volume["hostPath"], volume["containerPath"], volume["mode"].lower()
                )
            )
        else:
            print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume["hostPath"]
                ),
                file=sys.stderr,
            )

    original_docker_cmd = args.cmd or instance_config.get_cmd()
    spark_ui_port = pick_random_port(args.service + str(os.getpid()))
    spark_app_name = get_spark_app_name(original_docker_cmd, spark_ui_port)

    access_key, secret_key = get_aws_credentials(
        service=args.service,
        no_aws_credentials=args.no_aws_credentials,
        aws_credentials_yaml=args.aws_credentials_yaml,
        profile_name=args.aws_profile,
    )
    spark_config_dict = get_spark_config(
        args=args,
        spark_app_name=spark_app_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
        access_key=access_key,
        secret_key=secret_key,
    )
    spark_conf_str = create_spark_config_str(spark_config_dict, is_mrjob=args.mrjob)

    # Spark client specific volumes
    volumes.append("%s:rw" % args.work_dir)
    volumes.append("/etc/passwd:/etc/passwd:ro")
    volumes.append("/etc/group:/etc/group:ro")
    volumes.append("/nail/home:/nail/home:rw")

    environment = instance_config.get_env_dictionary()
    environment.update(
        get_spark_env(args, spark_conf_str, spark_ui_port, access_key, secret_key)
    )

    webui_url = get_webui_url(spark_ui_port)

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if "history-server" in docker_cmd:
        print(f"\nSpark history server URL {webui_url}\n")
    elif any(c in docker_cmd for c in ["pyspark", "spark-shell", "spark-submit"]):
        print(f"\nSpark monitoring URL {webui_url}\n")

    if clusterman_metrics and _should_emit_resource_requirements(
        docker_cmd, args.mrjob
    ):
        try:
            emit_resource_requirements(spark_config_dict, args.cluster, webui_url)
        except Boto3Error as e:
            print(
                PaastaColors.red(
                    f"Encountered {e} while attempting to send resource requirements to Clusterman."
                )
            )
            if args.suppress_clusterman_metrics_errors:
                print(
                    "Continuing anyway since --suppress-clusterman-metrics-errors was passed"
                )
            else:
                raise

    return run_docker_container(
        container_name=spark_app_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
        nvidia=args.nvidia,
    )
Exemple #13
0
def configure_and_run_docker_container(
    args: argparse.Namespace,
    docker_img: str,
    instance_config: InstanceConfig,
    system_paasta_config: SystemPaastaConfig,
    spark_conf: Mapping[str, str],
    aws_creds: Tuple[Optional[str], Optional[str], Optional[str]],
) -> int:

    # driver specific volumes
    volumes = (spark_conf.get("spark.mesos.executor.docker.volumes",
                              "").split(",") if
               spark_conf.get("spark.mesos.executor.docker.volumes", "") != ""
               else [])
    volumes.append("%s:rw" % args.work_dir)
    volumes.append("/nail/home:/nail/home:rw")

    environment = instance_config.get_env_dictionary()  # type: ignore
    spark_conf_str = create_spark_config_str(spark_conf, is_mrjob=args.mrjob)
    environment.update(
        get_spark_env(args, spark_conf_str, aws_creds,
                      spark_conf["spark.ui.port"]))  # type:ignore

    webui_url = get_webui_url(spark_conf["spark.ui.port"])

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if "history-server" in docker_cmd:
        print(f"\nSpark history server URL {webui_url}\n")
    elif any(c in docker_cmd
             for c in ["pyspark", "spark-shell", "spark-submit"]):
        signalfx_url = get_signalfx_url(spark_conf)
        print(f"\nSpark monitoring URL {webui_url}\n")
        print(f"\nSignalfx dashboard: {signalfx_url}\n")
        history_server_url = get_history_url(spark_conf)
        if history_server_url:
            print(
                f"\nAfter the job is finished, you can find the spark UI from {history_server_url}\n"
            )

    if clusterman_metrics and _should_emit_resource_requirements(
            docker_cmd, args.mrjob):
        try:
            print("Sending resource request metrics to Clusterman")
            hourly_cost, resources = send_and_calculate_resources_cost(
                clusterman_metrics, spark_conf, webui_url, args.pool)
            message = (
                f"Resource request ({resources['cpus']} cpus and {resources['mem']} MB memory total)"
                f" is estimated to cost ${hourly_cost} per hour")
            if clusterman_metrics.util.costs.should_warn(hourly_cost):
                print(PaastaColors.red(f"WARNING: {message}"))
            else:
                print(message)
        except Boto3Error as e:
            print(
                PaastaColors.red(
                    f"Encountered {e} while attempting to send resource requirements to Clusterman."
                ))
            if args.suppress_clusterman_metrics_errors:
                print(
                    "Continuing anyway since --suppress-clusterman-metrics-errors was passed"
                )
            else:
                raise

    return run_docker_container(
        container_name=spark_conf["spark.app.name"],
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
        nvidia=args.nvidia,
    )
Exemple #14
0
def test_configure_and_run_docker_container(
    mock_run_docker_container,
    mock_get_spark_conf_str,
    mock_get_username,
    mock_pick_random_port,
    mock_os_path_exists,
    mock_get_credentials,
):
    mock_pick_random_port.return_value = 123
    mock_get_username.return_value = 'fake_user'
    mock_get_spark_conf_str.return_value = '--conf spark.app.name=fake_app'
    mock_run_docker_container.return_value = 0
    mock_get_credentials.return_value = mock.MagicMock(access_key='id', secret_key='secret')

    args = mock.MagicMock()
    args.cluster = 'fake_cluster'
    args.cmd = 'pyspark'
    args.work_dir = '/fake_dir:/spark_driver'
    args.dry_run = True

    retcode = configure_and_run_docker_container(
        args=args,
        docker_img='fake-registry/fake-service',
        instance_config=InstanceConfig(
            cluster='fake_cluster',
            instance='fake_instance',
            service='fake_service',
            config_dict={
                'extra_volumes': [{
                    "hostPath": "/h1",
                    "containerPath": "/c1",
                    "mode": "RO",
                }],
            },
            branch_dict={'docker_image': 'fake_service:fake_sha'},
        ),
        system_paasta_config=SystemPaastaConfig(
            {
                'volumes': [{
                    "hostPath": "/h2",
                    "containerPath": "/c2",
                    "mode": "RO",
                }],
            },
            'fake_dir',
        ),
    )

    assert retcode == 0
    mock_run_docker_container.assert_called_once_with(
        container_name='paasta_spark_run_fake_user_123',
        volumes=[
            '/h1:/c1:ro',
            '/h2:/c2:ro',
            '/fake_dir:/spark_driver:rw',
            '/etc/passwd:/etc/passwd:ro',
            '/etc/group:/etc/group:ro',
        ],
        environment={
            'PAASTA_SERVICE': 'fake_service',
            'PAASTA_INSTANCE': 'fake_instance',
            'PAASTA_CLUSTER': 'fake_cluster',
            'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance',
            'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha',
            'AWS_ACCESS_KEY_ID': 'id',
            'AWS_SECRET_ACCESS_KEY': 'secret',
            'SPARK_USER': '******',
            'SPARK_OPTS': '--conf spark.app.name=fake_app',
        },
        docker_img='fake-registry/fake-service',
        docker_cmd='pyspark --conf spark.app.name=fake_app',
        dry_run=True,
    )