def _get_branch_dict(self, cluster: str, instance: str, config: utils.InstanceConfig) -> utils.BranchDictV2: if self._deployments_json is None: self._deployments_json = load_v2_deployments_json(self._service, soa_dir=self._soa_dir) branch = config.get_branch() deploy_group = config.get_deploy_group() return self._deployments_json.get_branch_dict(self._service, branch, deploy_group)
def _get_allowed_locations_and_hosts( self, instance_config: InstanceConfig ) -> Dict[str, Sequence[SmartstackHost]]: monitoring_blacklist = instance_config.get_monitoring_blacklist( system_deploy_blacklist=self._system_paasta_config. get_deploy_blacklist(), ) filtered_nodes = kubernetes_tools.filter_nodes_by_blacklist( nodes=self.nodes, blacklist=monitoring_blacklist, whitelist=None, ) discover_location_type = kubernetes_tools.load_service_namespace_config( service=instance_config.service, namespace=instance_config.instance, soa_dir=instance_config.soa_dir, ).get_discover() attribute_to_nodes = kubernetes_tools.get_nodes_grouped_by_attribute( nodes=filtered_nodes, attribute=discover_location_type, ) ret: Dict[str, Sequence[SmartstackHost]] = {} for attr, nodes in attribute_to_nodes.items(): ret[attr] = [ SmartstackHost( hostname=node.metadata.labels['yelp.com/hostname'], pool=node.metadata.labels['yelp.com/pool'], ) for node in nodes ] return ret
def _get_allowed_locations_and_hosts( self, instance_config: InstanceConfig ) -> Dict[str, Sequence[SmartstackHost]]: """Returns a dict of locations and lists of corresponding mesos slaves where deployment of the instance is allowed. :param instance_config: An instance of MarathonServiceConfig :returns: A dict {"uswest1-prod": [SmartstackHost(), SmartstackHost(), ...]} """ monitoring_blacklist = instance_config.get_monitoring_blacklist( system_deploy_blacklist=self._system_paasta_config. get_deploy_blacklist(), ) filtered_slaves = mesos_tools.filter_mesos_slaves_by_blacklist( slaves=self._mesos_slaves, blacklist=monitoring_blacklist, whitelist=None, ) discover_location_type = marathon_tools.load_service_namespace_config( service=instance_config.service, namespace=instance_config.instance, soa_dir=instance_config.soa_dir, ).get_discover() attribute_to_slaves = mesos_tools.get_mesos_slaves_grouped_by_attribute( slaves=filtered_slaves, attribute=discover_location_type, ) ret: Dict[str, Sequence[SmartstackHost]] = {} for attr, slaves in attribute_to_slaves.items(): ret[attr] = [ SmartstackHost(hostname=slave['hostname'], pool=slave['attributes']['pool']) for slave in slaves ] return ret
def _get_allowed_locations_and_slaves( self, instance_config: InstanceConfig) -> Dict[str, List[dict]]: """Returns a dict of locations and lists of corresponding mesos slaves where deployment of the instance is allowed. :param instance_config: An instance of MarathonServiceConfig :returns: A dict {"uswest1-prod": ['hostname1', 'hostname2], ...}. """ monitoring_blacklist = instance_config.get_monitoring_blacklist( system_deploy_blacklist=self._system_paasta_config. get_deploy_blacklist(), ) filtered_slaves = mesos_tools.filter_mesos_slaves_by_blacklist( slaves=self._mesos_slaves, blacklist=monitoring_blacklist, whitelist=None, ) discover_location_type = marathon_tools.load_service_namespace_config( service=instance_config.service, namespace=instance_config.instance, soa_dir=instance_config.soa_dir, ).get_discover() return mesos_tools.get_mesos_slaves_grouped_by_attribute( slaves=filtered_slaves, attribute=discover_location_type, )
def get_replication_for_instance( self, instance_config: InstanceConfig) -> Dict[str, Dict[str, int]]: """Returns the number of registered instances in each discoverable location. :param instance_config: An instance of MarathonServiceConfig. :returns: a dict {'location_type': {'service.instance': int}} """ replication_info = {} attribute_host_dict = self._get_allowed_locations_and_hosts( instance_config) instance_pool = instance_config.get_pool() for location, hosts in attribute_host_dict.items(): hostname = self._get_first_host_in_pool(hosts, instance_pool) replication_info[location] = self._get_replication_info( location, hostname, instance_config) return replication_info
class TestConfigureAndRunDockerContainer: instance_config = InstanceConfig( cluster="fake_cluster", instance="fake_instance", service="fake_service", config_dict={ "extra_volumes": [{ "hostPath": "/h1", "containerPath": "/c1", "mode": "RO" }] }, branch_dict={"docker_image": "fake_service:fake_sha"}, ) system_paasta_config = SystemPaastaConfig( { "volumes": [{ "hostPath": "/h2", "containerPath": "/c2", "mode": "RO" }] }, "fake_dir", ) @pytest.fixture def mock_create_spark_config_str(self): with mock.patch( "paasta_tools.cli.cmds.spark_run.create_spark_config_str", autospec=True) as _mock_create_spark_config_str: yield _mock_create_spark_config_str def test_configure_and_run_docker_container( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_pick_random_port.return_value = 123 mock_get_username.return_value = "fake_user" mock_get_spark_config.return_value = {"spark.app.name": "fake_app"} mock_run_docker_container.return_value = 0 mock_get_aws_credentials.return_value = ("id", "secret") args = mock.MagicMock() args.aws_region = "fake_region" args.cluster = "fake_cluster" args.cmd = "pyspark" args.work_dir = "/fake_dir:/spark_driver" args.dry_run = True args.mrjob = False args.nvidia = False with mock.patch( "paasta_tools.utils.get_service_docker_registry", autospec=True, return_value="fake-registry", ): retcode = configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name="paasta_spark_run_fake_user_123", volumes=[ "/h1:/c1:ro", "/h2:/c2:ro", "/fake_dir:/spark_driver:rw", "/etc/passwd:/etc/passwd:ro", "/etc/group:/etc/group:ro", "/nail/home:/nail/home:rw", ], environment={ "PAASTA_SERVICE": "fake_service", "PAASTA_INSTANCE": "fake_instance", "PAASTA_CLUSTER": "fake_cluster", "PAASTA_INSTANCE_TYPE": "spark", "PAASTA_DEPLOY_GROUP": "fake_cluster.fake_instance", "PAASTA_DOCKER_IMAGE": "fake_service:fake_sha", "PAASTA_LAUNCHED_BY": mock.ANY, "AWS_ACCESS_KEY_ID": "id", "AWS_SECRET_ACCESS_KEY": "secret", "AWS_DEFAULT_REGION": "fake_region", "SPARK_USER": "******", "SPARK_OPTS": "--conf spark.app.name=fake_app", "PAASTA_RESOURCE_CPUS": "0.25", "PAASTA_RESOURCE_DISK": "1024", "PAASTA_RESOURCE_MEM": "1024", "PAASTA_GIT_SHA": "fake_ser", }, docker_img="fake-registry/fake-service", docker_cmd="pyspark --conf spark.app.name=fake_app", dry_run=True, nvidia=False, ) def test_configure_and_run_docker_container_nvidia( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_get_aws_credentials.return_value = ("id", "secret") with mock.patch( "paasta_tools.cli.cmds.spark_run.emit_resource_requirements", autospec=True) as mock_emit_resource_requirements, mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): mock_get_spark_config.return_value = { "spark.cores.max": 5, "spark.master": "mesos://spark.master", } args = mock.MagicMock(cmd="pyspark", nvidia=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) args, kwargs = mock_run_docker_container.call_args assert kwargs["nvidia"] assert mock_emit_resource_requirements.called def test_configure_and_run_docker_container_mrjob( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_get_aws_credentials.return_value = ("id", "secret") with mock.patch( "paasta_tools.cli.cmds.spark_run.emit_resource_requirements", autospec=True) as mock_emit_resource_requirements, mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): mock_get_spark_config.return_value = { "spark.cores.max": 5, "spark.master": "mesos://spark.master", } args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) args, kwargs = mock_run_docker_container.call_args assert kwargs["docker_cmd"] == ( "python mrjob_wrapper.py --spark-master=mesos://spark.master --jobconf spark.cores.max=5" ) assert mock_emit_resource_requirements.called def test_suppress_clusterman_metrics_errors( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, mock_create_spark_config_str, ): mock_get_aws_credentials.return_value = ("id", "secret") with mock.patch( "paasta_tools.cli.cmds.spark_run.emit_resource_requirements", autospec=True) as mock_emit_resource_requirements, mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): mock_emit_resource_requirements.side_effect = Boto3Error mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" args = mock.MagicMock(suppress_clusterman_metrics_errors=False, cmd="pyspark") with pytest.raises(Boto3Error): configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) # make sure we don't blow up when this setting is True args.suppress_clusterman_metrics_errors = True configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) def test_dont_emit_metrics_for_inappropriate_commands( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, mock_create_spark_config_str, ): mock_get_aws_credentials.return_value = ("id", "secret") with mock.patch( "paasta_tools.cli.cmds.spark_run.emit_resource_requirements", autospec=True) as mock_emit_resource_requirements, mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" args = mock.MagicMock(cmd="bash", mrjob=False) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) assert not mock_emit_resource_requirements.called
class TestConfigureAndRunDockerContainer: instance_config = InstanceConfig( cluster="fake_cluster", instance="fake_instance", service="fake_service", config_dict={ "extra_volumes": [{"hostPath": "/h1", "containerPath": "/c1", "mode": "RO"}] }, branch_dict={"docker_image": "fake_service:fake_sha"}, ) system_paasta_config = SystemPaastaConfig( {"volumes": [{"hostPath": "/h2", "containerPath": "/c2", "mode": "RO"}]}, "fake_dir", ) @pytest.fixture def mock_create_spark_config_str(self): with mock.patch( "paasta_tools.cli.cmds.spark_run.create_spark_config_str", autospec=True ) as _mock_create_spark_config_str: yield _mock_create_spark_config_str @pytest.mark.parametrize( ["cluster_manager", "spark_args_volumes", "expected_volumes"], [ ( spark_run.CLUSTER_MANAGER_MESOS, { "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw" }, ["/mesos/volume:/mesos/volume:rw"], ), ( spark_run.CLUSTER_MANAGER_K8S, { "spark.kubernetes.executor.volumes.hostPath.0.mount.readOnly": "true", "spark.kubernetes.executor.volumes.hostPath.0.mount.path": "/k8s/volume0", "spark.kubernetes.executor.volumes.hostPath.0.options.path": "/k8s/volume0", "spark.kubernetes.executor.volumes.hostPath.1.mount.readOnly": "false", "spark.kubernetes.executor.volumes.hostPath.1.mount.path": "/k8s/volume1", "spark.kubernetes.executor.volumes.hostPath.1.options.path": "/k8s/volume1", }, ["/k8s/volume0:/k8s/volume0:ro", "/k8s/volume1:/k8s/volume1:rw"], ), ], ) def test_configure_and_run_docker_container( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, cluster_manager, spark_args_volumes, expected_volumes, ): mock_get_username.return_value = "fake_user" spark_conf = { "spark.app.name": "fake_app", "spark.ui.port": "1234", **spark_args_volumes, } mock_run_docker_container.return_value = 0 args = mock.MagicMock() args.aws_region = "fake_region" args.cluster = "fake_cluster" args.cmd = "pyspark" args.work_dir = "/fake_dir:/spark_driver" args.dry_run = True args.mrjob = False args.nvidia = False args.enable_compact_bin_packing = False args.cluster_manager = cluster_manager args.disable_aws_credential_env_variables = False args.docker_cpu_limit = False args.docker_memory_limit = False with mock.patch.object( self.instance_config, "get_env_dictionary", return_value={"env1": "val1"} ): retcode = configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=cluster_manager, pod_template_path="unique-run", ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name="fake_app", volumes=( expected_volumes + ["/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw"] ), environment={ "env1": "val1", "AWS_ACCESS_KEY_ID": "id", "AWS_SECRET_ACCESS_KEY": "secret", "AWS_SESSION_TOKEN": "token", "AWS_DEFAULT_REGION": "fake_region", "SPARK_OPTS": mock_create_spark_config_str.return_value, "SPARK_USER": "******", "PAASTA_INSTANCE_TYPE": "spark", "PAASTA_LAUNCHED_BY": mock.ANY, }, docker_img="fake-registry/fake-service", docker_cmd=mock_get_docker_cmd.return_value, dry_run=True, nvidia=False, docker_memory_limit="2g", docker_cpu_limit="1", ) @pytest.mark.parametrize( ["cluster_manager", "spark_args_volumes", "expected_volumes"], [ ( spark_run.CLUSTER_MANAGER_MESOS, { "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw" }, ["/mesos/volume:/mesos/volume:rw"], ), ( spark_run.CLUSTER_MANAGER_K8S, { "spark.kubernetes.executor.volumes.hostPath.0.mount.readOnly": "true", "spark.kubernetes.executor.volumes.hostPath.0.mount.path": "/k8s/volume0", "spark.kubernetes.executor.volumes.hostPath.0.options.path": "/k8s/volume0", "spark.kubernetes.executor.volumes.hostPath.1.mount.readOnly": "false", "spark.kubernetes.executor.volumes.hostPath.1.mount.path": "/k8s/volume1", "spark.kubernetes.executor.volumes.hostPath.1.options.path": "/k8s/volume1", }, ["/k8s/volume0:/k8s/volume0:ro", "/k8s/volume1:/k8s/volume1:rw"], ), ], ) def test_configure_and_run_docker_driver_resource_limits_config( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, cluster_manager, spark_args_volumes, expected_volumes, ): mock_get_username.return_value = "fake_user" spark_conf = { "spark.app.name": "fake_app", "spark.ui.port": "1234", "spark.driver.memory": "1g", "spark.driver.cores": "2", **spark_args_volumes, } mock_run_docker_container.return_value = 0 args = mock.MagicMock() args.aws_region = "fake_region" args.cluster = "fake_cluster" args.cmd = "pyspark" args.work_dir = "/fake_dir:/spark_driver" args.dry_run = True args.mrjob = False args.nvidia = False args.enable_compact_bin_packing = False args.disable_aws_credential_env_variables = False args.cluster_manager = cluster_manager args.docker_cpu_limit = 3 args.docker_memory_limit = "4g" with mock.patch.object( self.instance_config, "get_env_dictionary", return_value={"env1": "val1"} ): retcode = configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=cluster_manager, pod_template_path="unique-run", ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name="fake_app", volumes=( expected_volumes + ["/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw"] ), environment={ "env1": "val1", "AWS_ACCESS_KEY_ID": "id", "AWS_SECRET_ACCESS_KEY": "secret", "AWS_SESSION_TOKEN": "token", "AWS_DEFAULT_REGION": "fake_region", "SPARK_OPTS": mock_create_spark_config_str.return_value, "SPARK_USER": "******", "PAASTA_INSTANCE_TYPE": "spark", "PAASTA_LAUNCHED_BY": mock.ANY, }, docker_img="fake-registry/fake-service", docker_cmd=mock_get_docker_cmd.return_value, dry_run=True, nvidia=False, docker_memory_limit="4g", docker_cpu_limit=3, ) @pytest.mark.parametrize( ["cluster_manager", "spark_args_volumes", "expected_volumes"], [ ( spark_run.CLUSTER_MANAGER_MESOS, { "spark.mesos.executor.docker.volumes": "/mesos/volume:/mesos/volume:rw" }, ["/mesos/volume:/mesos/volume:rw"], ), ( spark_run.CLUSTER_MANAGER_K8S, { "spark.kubernetes.executor.volumes.hostPath.0.mount.readOnly": "true", "spark.kubernetes.executor.volumes.hostPath.0.mount.path": "/k8s/volume0", "spark.kubernetes.executor.volumes.hostPath.0.options.path": "/k8s/volume0", "spark.kubernetes.executor.volumes.hostPath.1.mount.readOnly": "false", "spark.kubernetes.executor.volumes.hostPath.1.mount.path": "/k8s/volume1", "spark.kubernetes.executor.volumes.hostPath.1.options.path": "/k8s/volume1", }, ["/k8s/volume0:/k8s/volume0:ro", "/k8s/volume1:/k8s/volume1:rw"], ), ], ) def test_configure_and_run_docker_driver_resource_limits( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, cluster_manager, spark_args_volumes, expected_volumes, ): mock_get_username.return_value = "fake_user" spark_conf = { "spark.app.name": "fake_app", "spark.ui.port": "1234", "spark.driver.memory": "1g", "spark.driver.cores": "2", **spark_args_volumes, } mock_run_docker_container.return_value = 0 args = mock.MagicMock() args.aws_region = "fake_region" args.cluster = "fake_cluster" args.cmd = "pyspark" args.work_dir = "/fake_dir:/spark_driver" args.dry_run = True args.mrjob = False args.nvidia = False args.enable_compact_bin_packing = False args.cluster_manager = cluster_manager args.disable_aws_credential_env_variables = False args.docker_cpu_limit = False args.docker_memory_limit = False with mock.patch.object( self.instance_config, "get_env_dictionary", return_value={"env1": "val1"} ): retcode = configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=cluster_manager, pod_template_path="unique-run", ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name="fake_app", volumes=( expected_volumes + ["/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw"] ), environment={ "env1": "val1", "AWS_ACCESS_KEY_ID": "id", "AWS_SECRET_ACCESS_KEY": "secret", "AWS_SESSION_TOKEN": "token", "AWS_DEFAULT_REGION": "fake_region", "SPARK_OPTS": mock_create_spark_config_str.return_value, "SPARK_USER": "******", "PAASTA_INSTANCE_TYPE": "spark", "PAASTA_LAUNCHED_BY": mock.ANY, }, docker_img="fake-registry/fake-service", docker_cmd=mock_get_docker_cmd.return_value, dry_run=True, nvidia=False, docker_memory_limit="2g", docker_cpu_limit="2", ) def test_configure_and_run_docker_container_nvidia( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True ): spark_conf = { "spark.cores.max": "5", "spark.master": "mesos://spark.master", "spark.ui.port": "1234", "spark.app.name": "fake app", } args = mock.MagicMock(cmd="pyspark", nvidia=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", ) args, kwargs = mock_run_docker_container.call_args assert kwargs["nvidia"] assert mock_send_and_calculate_resources_cost.called def test_configure_and_run_docker_container_mrjob( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True ): spark_conf = { "spark.cores.max": 5, "spark.master": "mesos://spark.master", "spark.ui.port": "1234", "spark.app.name": "fake_app", } args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", ) args, kwargs = mock_run_docker_container.call_args assert kwargs["docker_cmd"] == mock_get_docker_cmd.return_value assert mock_send_and_calculate_resources_cost.called def test_suppress_clusterman_metrics_errors( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True ): mock_send_and_calculate_resources_cost.side_effect = Boto3Error mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" spark_conf = { "spark.cores.max": 5, "spark.ui.port": "1234", "spark.app.name": "fake app", } args = mock.MagicMock( suppress_clusterman_metrics_errors=False, cmd="pyspark" ) with pytest.raises(Boto3Error): configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", ) # make sure we don't blow up when this setting is True args.suppress_clusterman_metrics_errors = True configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", ) def test_dont_emit_metrics_for_inappropriate_commands( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True ): mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" args = mock.MagicMock(cmd="bash", mrjob=False) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf={"spark.ui.port": "1234", "spark.app.name": "fake_app"}, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", ) assert not mock_send_and_calculate_resources_cost.called
def configure_and_run_docker_container( args: argparse.Namespace, docker_img: str, instance_config: InstanceConfig, system_paasta_config: SystemPaastaConfig, spark_conf: Mapping[str, str], aws_creds: Tuple[Optional[str], Optional[str], Optional[str]], cluster_manager: str, pod_template_path: str, ) -> int: # driver specific volumes volumes: List[str] = [] docker_memory_limit = _calculate_docker_memory_limit( spark_conf, args.docker_memory_limit ) docker_cpu_limit = _calculate_docker_cpu_limit( spark_conf, args.docker_cpu_limit, ) if cluster_manager == CLUSTER_MANAGER_MESOS: volumes = ( spark_conf.get("spark.mesos.executor.docker.volumes", "").split(",") if spark_conf.get("spark.mesos.executor.docker.volumes", "") != "" else [] ) elif cluster_manager == CLUSTER_MANAGER_K8S: volume_names = [ re.match( r"spark.kubernetes.executor.volumes.hostPath.(\d+).mount.path", key ).group(1) for key in spark_conf.keys() if "spark.kubernetes.executor.volumes.hostPath." in key and ".mount.path" in key ] for volume_name in volume_names: read_only = ( "ro" if spark_conf.get( f"spark.kubernetes.executor.volumes.hostPath.{volume_name}.mount.readOnly" ) == "true" else "rw" ) container_path = spark_conf.get( f"spark.kubernetes.executor.volumes.hostPath.{volume_name}.mount.path" ) host_path = spark_conf.get( f"spark.kubernetes.executor.volumes.hostPath.{volume_name}.options.path" ) volumes.append(f"{host_path}:{container_path}:{read_only}") volumes.append("%s:rw" % args.work_dir) volumes.append("/nail/home:/nail/home:rw") if args.enable_compact_bin_packing: volumes.append(f"{pod_template_path}:{pod_template_path}:rw") environment = instance_config.get_env_dictionary() # type: ignore spark_conf_str = create_spark_config_str(spark_conf, is_mrjob=args.mrjob) environment.update( get_spark_env(args, spark_conf_str, aws_creds, spark_conf["spark.ui.port"]) ) # type:ignore webui_url = get_webui_url(spark_conf["spark.ui.port"]) webui_url_msg = f"\nSpark monitoring URL {webui_url}\n" docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str) if "history-server" in docker_cmd: print(f"\nSpark history server URL {webui_url}\n") elif any(c in docker_cmd for c in ["pyspark", "spark-shell", "spark-submit"]): signalfx_url = get_signalfx_url(spark_conf) signalfx_url_msg = f"\nSignalfx dashboard: {signalfx_url}\n" print(webui_url_msg) print(signalfx_url_msg) log.info(webui_url_msg) log.info(signalfx_url_msg) history_server_url = get_history_url(spark_conf) if history_server_url: history_server_url_msg = ( f"\nAfter the job is finished, you can find the spark UI from {history_server_url}\n" "Check y/spark-recent-history for faster access to prod logs\n" ) print(history_server_url_msg) log.info(history_server_url_msg) print(f"Selected cluster manager: {cluster_manager}\n") if clusterman_metrics and _should_get_resource_requirements(docker_cmd, args.mrjob): try: if cluster_manager == CLUSTER_MANAGER_MESOS: print("Sending resource request metrics to Clusterman") hourly_cost, resources = send_and_calculate_resources_cost( clusterman_metrics, spark_conf, webui_url, args.pool ) else: resources = get_resources_requested(spark_conf) hourly_cost = get_spark_hourly_cost( clusterman_metrics, resources, spark_conf["spark.executorEnv.PAASTA_CLUSTER"], args.pool, ) message = ( f"Resource request ({resources['cpus']} cpus and {resources['mem']} MB memory total)" f" is estimated to cost ${hourly_cost} per hour" ) if clusterman_metrics.util.costs.should_warn(hourly_cost): print(PaastaColors.red(f"WARNING: {message}")) else: print(message) except Boto3Error as e: print( PaastaColors.red( f"Encountered {e} while attempting to send resource requirements to Clusterman." ) ) if args.suppress_clusterman_metrics_errors: print( "Continuing anyway since --suppress-clusterman-metrics-errors was passed" ) else: raise final_spark_submit_cmd_msg = f"Final command: {docker_cmd}" print(PaastaColors.grey(final_spark_submit_cmd_msg)) log.info(final_spark_submit_cmd_msg) return run_docker_container( container_name=spark_conf["spark.app.name"], volumes=volumes, environment=environment, docker_img=docker_img, docker_cmd=docker_cmd, dry_run=args.dry_run, nvidia=args.nvidia, docker_memory_limit=docker_memory_limit, docker_cpu_limit=docker_cpu_limit, )
class TestConfigureAndRunDockerContainer: instance_config = InstanceConfig( cluster="fake_cluster", instance="fake_instance", service="fake_service", config_dict={ "extra_volumes": [{ "hostPath": "/h1", "containerPath": "/c1", "mode": "RO" }] }, branch_dict={"docker_image": "fake_service:fake_sha"}, ) system_paasta_config = SystemPaastaConfig( { "volumes": [{ "hostPath": "/h2", "containerPath": "/c2", "mode": "RO" }] }, "fake_dir", ) @pytest.fixture def mock_create_spark_config_str(self): with mock.patch( "paasta_tools.cli.cmds.spark_run.create_spark_config_str", autospec=True) as _mock_create_spark_config_str: yield _mock_create_spark_config_str def test_configure_and_run_docker_container( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): mock_get_username.return_value = "fake_user" spark_conf = {"spark.app.name": "fake_app", "spark.ui.port": "1234"} mock_run_docker_container.return_value = 0 args = mock.MagicMock() args.aws_region = "fake_region" args.cluster = "fake_cluster" args.cmd = "pyspark" args.work_dir = "/fake_dir:/spark_driver" args.dry_run = True args.mrjob = False args.nvidia = False with mock.patch.object(self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}): retcode = configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name="fake_app", volumes=[ "/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw", ], environment={ "env1": "val1", "AWS_ACCESS_KEY_ID": "id", "AWS_SECRET_ACCESS_KEY": "secret", "AWS_SESSION_TOKEN": "token", "AWS_DEFAULT_REGION": "fake_region", "SPARK_OPTS": mock_create_spark_config_str.return_value, "SPARK_USER": "******", "PAASTA_INSTANCE_TYPE": "spark", "PAASTA_LAUNCHED_BY": mock.ANY, }, docker_img="fake-registry/fake-service", docker_cmd=mock_get_docker_cmd.return_value, dry_run=True, nvidia=False, ) def test_configure_and_run_docker_container_nvidia( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): spark_conf = { "spark.cores.max": "5", "spark.master": "mesos://spark.master", "spark.ui.port": "1234", "spark.app.name": "fake app", } args = mock.MagicMock(cmd="pyspark", nvidia=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, ) args, kwargs = mock_run_docker_container.call_args assert kwargs["nvidia"] assert mock_send_and_calculate_resources_cost.called def test_configure_and_run_docker_container_mrjob( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): spark_conf = { "spark.cores.max": 5, "spark.master": "mesos://spark.master", "spark.ui.port": "1234", "spark.app.name": "fake_app", } args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, ) args, kwargs = mock_run_docker_container.call_args assert kwargs["docker_cmd"] == mock_get_docker_cmd.return_value assert mock_send_and_calculate_resources_cost.called def test_suppress_clusterman_metrics_errors( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): mock_send_and_calculate_resources_cost.side_effect = Boto3Error mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" spark_conf = { "spark.cores.max": 5, "spark.ui.port": "1234", "spark.app.name": "fake app", } args = mock.MagicMock(suppress_clusterman_metrics_errors=False, cmd="pyspark") with pytest.raises(Boto3Error): configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, ) # make sure we don't blow up when this setting is True args.suppress_clusterman_metrics_errors = True configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, ) def test_dont_emit_metrics_for_inappropriate_commands( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" args = mock.MagicMock(cmd="bash", mrjob=False) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf={ "spark.ui.port": "1234", "spark.app.name": "fake_app" }, ) assert not mock_send_and_calculate_resources_cost.called
class TestConfigureAndRunDockerContainer: instance_config = InstanceConfig( cluster='fake_cluster', instance='fake_instance', service='fake_service', config_dict={ 'extra_volumes': [{ "hostPath": "/h1", "containerPath": "/c1", "mode": "RO", }], }, branch_dict={'docker_image': 'fake_service:fake_sha'}, ) system_paasta_config = SystemPaastaConfig( { 'volumes': [{ "hostPath": "/h2", "containerPath": "/c2", "mode": "RO", }], }, 'fake_dir', ) def test_configure_and_run_docker_container( self, mock_time, mock_run_docker_container, mock_create_spark_config_str, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_pick_random_port.return_value = 123 mock_get_username.return_value = 'fake_user' mock_create_spark_config_str.return_value = '--conf spark.app.name=fake_app' mock_run_docker_container.return_value = 0 mock_get_aws_credentials.return_value = ('id', 'secret') mock_time.return_value = 1138 args = mock.MagicMock() args.cluster = 'fake_cluster' args.cmd = 'pyspark' args.work_dir = '/fake_dir:/spark_driver' args.dry_run = True retcode = configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name='paasta_spark_run_fake_user_123_1138', volumes=[ '/h1:/c1:ro', '/h2:/c2:ro', '/fake_dir:/spark_driver:rw', '/etc/passwd:/etc/passwd:ro', '/etc/group:/etc/group:ro', ], environment={ 'PAASTA_SERVICE': 'fake_service', 'PAASTA_INSTANCE': 'fake_instance', 'PAASTA_CLUSTER': 'fake_cluster', 'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance', 'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha', 'AWS_ACCESS_KEY_ID': 'id', 'AWS_SECRET_ACCESS_KEY': 'secret', 'SPARK_USER': '******', 'SPARK_OPTS': '--conf spark.app.name=fake_app', }, docker_img='fake-registry/fake-service', docker_cmd='pyspark --conf spark.app.name=fake_app', dry_run=True, ) def test_suppress_clusterman_metrics_errors( self, mock_time, mock_run_docker_container, mock_create_spark_config_str, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_get_aws_credentials.return_value = ('id', 'secret') with mock.patch( 'paasta_tools.cli.cmds.spark_run.emit_resource_requirements', autospec=True, ) as mock_emit_resource_requirements, mock.patch( 'paasta_tools.cli.cmds.spark_run.clusterman_metrics', autospec=True, ): mock_emit_resource_requirements.side_effect = Boto3Error mock_create_spark_config_str.return_value = '--conf spark.cores.max=5' args = mock.MagicMock( suppress_clusterman_metrics_errors=False, cmd='pyspark', ) with pytest.raises(Boto3Error): configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) # make sure we don't blow up when this setting is True args.suppress_clusterman_metrics_errors = True configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) def test_dont_emit_metrics_for_inappropriate_commands( self, mock_time, mock_run_docker_container, mock_create_spark_config_str, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_get_aws_credentials.return_value = ('id', 'secret') with mock.patch( 'paasta_tools.cli.cmds.spark_run.emit_resource_requirements', autospec=True, ) as mock_emit_resource_requirements, mock.patch( 'paasta_tools.cli.cmds.spark_run.clusterman_metrics', autospec=True, ): mock_create_spark_config_str.return_value = '--conf spark.cores.max=5' args = mock.MagicMock(cmd='bash') configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) assert not mock_emit_resource_requirements.called
def test_configure_and_run_docker_container( mock_run_docker_container, mock_get_spark_configuration, mock_get_username, mock_pick_random_port, mock_getcwd, mock_os_path_exists, ): mock_pick_random_port.return_value = 123 mock_getcwd.return_value = 'fake_cwd' mock_get_username.return_value = 'fake_user' mock_get_spark_configuration.return_value = {'APP_NAME': 'fake_app'} mock_run_docker_container.return_value = 0 args = mock.MagicMock() args.cluster = 'fake_cluster' args.cmd = 'pyspark' args.dry_run = True retcode = configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=InstanceConfig( cluster='fake_cluster', instance='fake_instance', service='fake_service', config_dict={ 'extra_volumes': [{ "hostPath": "/h1", "containerPath": "/c1", "mode": "RO", }], }, branch_dict={'docker_image': 'fake_service:fake_sha'}, ), system_paasta_config=SystemPaastaConfig( { 'volumes': [{ "hostPath": "/h2", "containerPath": "/c2", "mode": "RO", }], }, 'fake_dir', ), ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name='paasta_spark_run_fake_user_123', volumes=[ '/h1:/c1:ro', '/h2:/c2:ro', 'fake_cwd:/spark_client:rw', '/etc/passwd:/etc/passwd:ro', '/etc/group:/etc/group:ro', ], environment={ 'PAASTA_SERVICE': 'fake_service', 'PAASTA_INSTANCE': 'fake_instance', 'PAASTA_CLUSTER': 'fake_cluster', 'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance', 'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha', 'APP_NAME': 'fake_app', }, docker_img='fake-registry/fake-service', docker_cmd='pyspark', dry_run=True, )
def configure_and_run_docker_container( args: argparse.Namespace, docker_img: str, instance_config: InstanceConfig, system_paasta_config: SystemPaastaConfig, ) -> int: volumes = list() for volume in instance_config.get_volumes(system_paasta_config.get_volumes()): if os.path.exists(volume["hostPath"]): volumes.append( "{}:{}:{}".format( volume["hostPath"], volume["containerPath"], volume["mode"].lower() ) ) else: print( PaastaColors.yellow( "Warning: Path %s does not exist on this host. Skipping this binding." % volume["hostPath"] ), file=sys.stderr, ) original_docker_cmd = args.cmd or instance_config.get_cmd() spark_ui_port = pick_random_port(args.service + str(os.getpid())) spark_app_name = get_spark_app_name(original_docker_cmd, spark_ui_port) access_key, secret_key = get_aws_credentials( service=args.service, no_aws_credentials=args.no_aws_credentials, aws_credentials_yaml=args.aws_credentials_yaml, profile_name=args.aws_profile, ) spark_config_dict = get_spark_config( args=args, spark_app_name=spark_app_name, spark_ui_port=spark_ui_port, docker_img=docker_img, system_paasta_config=system_paasta_config, volumes=volumes, access_key=access_key, secret_key=secret_key, ) spark_conf_str = create_spark_config_str(spark_config_dict, is_mrjob=args.mrjob) # Spark client specific volumes volumes.append("%s:rw" % args.work_dir) volumes.append("/etc/passwd:/etc/passwd:ro") volumes.append("/etc/group:/etc/group:ro") volumes.append("/nail/home:/nail/home:rw") environment = instance_config.get_env_dictionary() environment.update( get_spark_env(args, spark_conf_str, spark_ui_port, access_key, secret_key) ) webui_url = get_webui_url(spark_ui_port) docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str) if "history-server" in docker_cmd: print(f"\nSpark history server URL {webui_url}\n") elif any(c in docker_cmd for c in ["pyspark", "spark-shell", "spark-submit"]): print(f"\nSpark monitoring URL {webui_url}\n") if clusterman_metrics and _should_emit_resource_requirements( docker_cmd, args.mrjob ): try: emit_resource_requirements(spark_config_dict, args.cluster, webui_url) except Boto3Error as e: print( PaastaColors.red( f"Encountered {e} while attempting to send resource requirements to Clusterman." ) ) if args.suppress_clusterman_metrics_errors: print( "Continuing anyway since --suppress-clusterman-metrics-errors was passed" ) else: raise return run_docker_container( container_name=spark_app_name, volumes=volumes, environment=environment, docker_img=docker_img, docker_cmd=docker_cmd, dry_run=args.dry_run, nvidia=args.nvidia, )
def configure_and_run_docker_container( args: argparse.Namespace, docker_img: str, instance_config: InstanceConfig, system_paasta_config: SystemPaastaConfig, spark_conf: Mapping[str, str], aws_creds: Tuple[Optional[str], Optional[str], Optional[str]], ) -> int: # driver specific volumes volumes = (spark_conf.get("spark.mesos.executor.docker.volumes", "").split(",") if spark_conf.get("spark.mesos.executor.docker.volumes", "") != "" else []) volumes.append("%s:rw" % args.work_dir) volumes.append("/nail/home:/nail/home:rw") environment = instance_config.get_env_dictionary() # type: ignore spark_conf_str = create_spark_config_str(spark_conf, is_mrjob=args.mrjob) environment.update( get_spark_env(args, spark_conf_str, aws_creds, spark_conf["spark.ui.port"])) # type:ignore webui_url = get_webui_url(spark_conf["spark.ui.port"]) docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str) if "history-server" in docker_cmd: print(f"\nSpark history server URL {webui_url}\n") elif any(c in docker_cmd for c in ["pyspark", "spark-shell", "spark-submit"]): signalfx_url = get_signalfx_url(spark_conf) print(f"\nSpark monitoring URL {webui_url}\n") print(f"\nSignalfx dashboard: {signalfx_url}\n") history_server_url = get_history_url(spark_conf) if history_server_url: print( f"\nAfter the job is finished, you can find the spark UI from {history_server_url}\n" ) if clusterman_metrics and _should_emit_resource_requirements( docker_cmd, args.mrjob): try: print("Sending resource request metrics to Clusterman") hourly_cost, resources = send_and_calculate_resources_cost( clusterman_metrics, spark_conf, webui_url, args.pool) message = ( f"Resource request ({resources['cpus']} cpus and {resources['mem']} MB memory total)" f" is estimated to cost ${hourly_cost} per hour") if clusterman_metrics.util.costs.should_warn(hourly_cost): print(PaastaColors.red(f"WARNING: {message}")) else: print(message) except Boto3Error as e: print( PaastaColors.red( f"Encountered {e} while attempting to send resource requirements to Clusterman." )) if args.suppress_clusterman_metrics_errors: print( "Continuing anyway since --suppress-clusterman-metrics-errors was passed" ) else: raise return run_docker_container( container_name=spark_conf["spark.app.name"], volumes=volumes, environment=environment, docker_img=docker_img, docker_cmd=docker_cmd, dry_run=args.dry_run, nvidia=args.nvidia, )
def test_configure_and_run_docker_container( mock_run_docker_container, mock_get_spark_conf_str, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_credentials, ): mock_pick_random_port.return_value = 123 mock_get_username.return_value = 'fake_user' mock_get_spark_conf_str.return_value = '--conf spark.app.name=fake_app' mock_run_docker_container.return_value = 0 mock_get_credentials.return_value = mock.MagicMock(access_key='id', secret_key='secret') args = mock.MagicMock() args.cluster = 'fake_cluster' args.cmd = 'pyspark' args.work_dir = '/fake_dir:/spark_driver' args.dry_run = True retcode = configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=InstanceConfig( cluster='fake_cluster', instance='fake_instance', service='fake_service', config_dict={ 'extra_volumes': [{ "hostPath": "/h1", "containerPath": "/c1", "mode": "RO", }], }, branch_dict={'docker_image': 'fake_service:fake_sha'}, ), system_paasta_config=SystemPaastaConfig( { 'volumes': [{ "hostPath": "/h2", "containerPath": "/c2", "mode": "RO", }], }, 'fake_dir', ), ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name='paasta_spark_run_fake_user_123', volumes=[ '/h1:/c1:ro', '/h2:/c2:ro', '/fake_dir:/spark_driver:rw', '/etc/passwd:/etc/passwd:ro', '/etc/group:/etc/group:ro', ], environment={ 'PAASTA_SERVICE': 'fake_service', 'PAASTA_INSTANCE': 'fake_instance', 'PAASTA_CLUSTER': 'fake_cluster', 'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance', 'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha', 'AWS_ACCESS_KEY_ID': 'id', 'AWS_SECRET_ACCESS_KEY': 'secret', 'SPARK_USER': '******', 'SPARK_OPTS': '--conf spark.app.name=fake_app', }, docker_img='fake-registry/fake-service', docker_cmd='pyspark --conf spark.app.name=fake_app', dry_run=True, )