def test_dont_emit_metrics_for_inappropriate_commands( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, mock_create_spark_config_str, ): mock_get_aws_credentials.return_value = ("id", "secret") with mock.patch( "paasta_tools.cli.cmds.spark_run.emit_resource_requirements", autospec=True) as mock_emit_resource_requirements, mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" args = mock.MagicMock(cmd="bash", mrjob=False) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) assert not mock_emit_resource_requirements.called
def test_configure_and_run_docker_container_mrjob( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_get_aws_credentials.return_value = ('id', 'secret') with mock.patch( 'paasta_tools.cli.cmds.spark_run.emit_resource_requirements', autospec=True, ) as mock_emit_resource_requirements, mock.patch( 'paasta_tools.cli.cmds.spark_run.clusterman_metrics', autospec=True, ): mock_get_spark_config.return_value = {'spark.cores.max': 5, 'spark.master': 'mesos://spark.master'} args = mock.MagicMock(cmd='python mrjob_wrapper.py', mrjob=True) configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) args, kwargs = mock_run_docker_container.call_args assert kwargs['docker_cmd'] == ( 'python mrjob_wrapper.py --spark-master=mesos://spark.master --jobconf spark.cores.max=5' ) assert mock_emit_resource_requirements.called
def test_configure_and_run_docker_container_mrjob( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_get_aws_credentials.return_value = ("id", "secret") with mock.patch( "paasta_tools.cli.cmds.spark_run.emit_resource_requirements", autospec=True) as mock_emit_resource_requirements, mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): mock_get_spark_config.return_value = { "spark.cores.max": 5, "spark.master": "mesos://spark.master", } args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) args, kwargs = mock_run_docker_container.call_args assert kwargs["docker_cmd"] == ( "python mrjob_wrapper.py --spark-master=mesos://spark.master --jobconf spark.cores.max=5" ) assert mock_emit_resource_requirements.called
def test_dont_emit_metrics_for_inappropriate_commands( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True ): mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" args = mock.MagicMock(cmd="bash", mrjob=False) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf={"spark.ui.port": "1234", "spark.app.name": "fake_app"}, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", ) assert not mock_send_and_calculate_resources_cost.called
def test_configure_and_run_docker_container_mrjob( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True): spark_conf = { "spark.cores.max": 5, "spark.master": "mesos://spark.master", "spark.ui.port": "1234", "spark.app.name": "fake_app", } args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, ) args, kwargs = mock_run_docker_container.call_args assert kwargs["docker_cmd"] == mock_get_docker_cmd.return_value assert mock_send_and_calculate_resources_cost.called
def test_suppress_clusterman_metrics_errors( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True ): mock_send_and_calculate_resources_cost.side_effect = Boto3Error mock_create_spark_config_str.return_value = "--conf spark.cores.max=5" spark_conf = { "spark.cores.max": 5, "spark.ui.port": "1234", "spark.app.name": "fake app", } args = mock.MagicMock( suppress_clusterman_metrics_errors=False, cmd="pyspark" ) with pytest.raises(Boto3Error): configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", ) # make sure we don't blow up when this setting is True args.suppress_clusterman_metrics_errors = True configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", )
def test_configure_and_run_docker_container( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_pick_random_port.return_value = 123 mock_get_username.return_value = 'fake_user' mock_get_spark_config.return_value = {'spark.app.name': 'fake_app'} mock_run_docker_container.return_value = 0 mock_get_aws_credentials.return_value = ('id', 'secret') args = mock.MagicMock() args.cluster = 'fake_cluster' args.cmd = 'pyspark' args.work_dir = '/fake_dir:/spark_driver' args.dry_run = True args.mrjob = False args.nvidia = False retcode = configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name='paasta_spark_run_fake_user_123', volumes=[ '/h1:/c1:ro', '/h2:/c2:ro', '/fake_dir:/spark_driver:rw', '/etc/passwd:/etc/passwd:ro', '/etc/group:/etc/group:ro', ], environment={ 'PAASTA_SERVICE': 'fake_service', 'PAASTA_INSTANCE': 'fake_instance', 'PAASTA_CLUSTER': 'fake_cluster', 'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance', 'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha', 'PAASTA_LAUNCHED_BY': mock.ANY, 'AWS_ACCESS_KEY_ID': 'id', 'AWS_SECRET_ACCESS_KEY': 'secret', 'SPARK_USER': '******', 'SPARK_OPTS': '--conf spark.app.name=fake_app', }, docker_img='fake-registry/fake-service', docker_cmd='pyspark --conf spark.app.name=fake_app', dry_run=True, nvidia=False, )
def test_configure_and_run_docker_container( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): mock_get_username.return_value = "fake_user" spark_conf = {"spark.app.name": "fake_app", "spark.ui.port": "1234"} mock_run_docker_container.return_value = 0 args = mock.MagicMock() args.aws_region = "fake_region" args.cluster = "fake_cluster" args.cmd = "pyspark" args.work_dir = "/fake_dir:/spark_driver" args.dry_run = True args.mrjob = False args.nvidia = False with mock.patch.object(self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}): retcode = configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name="fake_app", volumes=[ "/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw", ], environment={ "env1": "val1", "AWS_ACCESS_KEY_ID": "id", "AWS_SECRET_ACCESS_KEY": "secret", "AWS_SESSION_TOKEN": "token", "AWS_DEFAULT_REGION": "fake_region", "SPARK_OPTS": mock_create_spark_config_str.return_value, "SPARK_USER": "******", "PAASTA_INSTANCE_TYPE": "spark", "PAASTA_LAUNCHED_BY": mock.ANY, }, docker_img="fake-registry/fake-service", docker_cmd=mock_get_docker_cmd.return_value, dry_run=True, nvidia=False, )
def test_suppress_clusterman_metrics_errors( self, mock_time, mock_run_docker_container, mock_create_spark_config_str, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_get_aws_credentials.return_value = ('id', 'secret') with mock.patch( 'paasta_tools.cli.cmds.spark_run.emit_resource_requirements', autospec=True, ) as mock_emit_resource_requirements, mock.patch( 'paasta_tools.cli.cmds.spark_run.clusterman_metrics', autospec=True, ): mock_emit_resource_requirements.side_effect = Boto3Error mock_create_spark_config_str.return_value = '--conf spark.cores.max=5' args = mock.MagicMock( suppress_clusterman_metrics_errors=False, cmd='pyspark', ) with pytest.raises(Boto3Error): configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) # make sure we don't blow up when this setting is True args.suppress_clusterman_metrics_errors = True configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, )
def test_configure_and_run_docker_container_nvidia( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, ): with mock.patch( "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True ): spark_conf = { "spark.cores.max": "5", "spark.master": "mesos://spark.master", "spark.ui.port": "1234", "spark.app.name": "fake app", } args = mock.MagicMock(cmd="pyspark", nvidia=True) configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=spark_run.CLUSTER_MANAGER_MESOS, pod_template_path="unique-run", ) args, kwargs = mock_run_docker_container.call_args assert kwargs["nvidia"] assert mock_send_and_calculate_resources_cost.called
def test_configure_and_run_docker_container( self, mock_time, mock_run_docker_container, mock_get_spark_config, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_aws_credentials, ): mock_pick_random_port.return_value = 123 mock_get_username.return_value = "fake_user" mock_get_spark_config.return_value = {"spark.app.name": "fake_app"} mock_run_docker_container.return_value = 0 mock_get_aws_credentials.return_value = ("id", "secret") args = mock.MagicMock() args.aws_region = "fake_region" args.cluster = "fake_cluster" args.cmd = "pyspark" args.work_dir = "/fake_dir:/spark_driver" args.dry_run = True args.mrjob = False args.nvidia = False with mock.patch( "paasta_tools.utils.get_service_docker_registry", autospec=True, return_value="fake-registry", ): retcode = configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name="paasta_spark_run_fake_user_123", volumes=[ "/h1:/c1:ro", "/h2:/c2:ro", "/fake_dir:/spark_driver:rw", "/etc/passwd:/etc/passwd:ro", "/etc/group:/etc/group:ro", "/nail/home:/nail/home:rw", ], environment={ "PAASTA_SERVICE": "fake_service", "PAASTA_INSTANCE": "fake_instance", "PAASTA_CLUSTER": "fake_cluster", "PAASTA_INSTANCE_TYPE": "spark", "PAASTA_DEPLOY_GROUP": "fake_cluster.fake_instance", "PAASTA_DOCKER_IMAGE": "fake_service:fake_sha", "PAASTA_LAUNCHED_BY": mock.ANY, "AWS_ACCESS_KEY_ID": "id", "AWS_SECRET_ACCESS_KEY": "secret", "AWS_DEFAULT_REGION": "fake_region", "SPARK_USER": "******", "SPARK_OPTS": "--conf spark.app.name=fake_app", "PAASTA_RESOURCE_CPUS": "0.25", "PAASTA_RESOURCE_DISK": "1024", "PAASTA_RESOURCE_MEM": "1024", "PAASTA_GIT_SHA": "fake_ser", }, docker_img="fake-registry/fake-service", docker_cmd="pyspark --conf spark.app.name=fake_app", dry_run=True, nvidia=False, )
def test_configure_and_run_docker_driver_resource_limits_config( self, mock_get_history_url, mock_et_signalfx_url, mock_get_docker_cmd, mock_create_spark_config_str, mock_get_webui_url, mock_send_and_calculate_resources_cost, mock_run_docker_container, mock_get_username, cluster_manager, spark_args_volumes, expected_volumes, ): mock_get_username.return_value = "fake_user" spark_conf = { "spark.app.name": "fake_app", "spark.ui.port": "1234", "spark.driver.memory": "1g", "spark.driver.cores": "2", **spark_args_volumes, } mock_run_docker_container.return_value = 0 args = mock.MagicMock() args.aws_region = "fake_region" args.cluster = "fake_cluster" args.cmd = "pyspark" args.work_dir = "/fake_dir:/spark_driver" args.dry_run = True args.mrjob = False args.nvidia = False args.enable_compact_bin_packing = False args.disable_aws_credential_env_variables = False args.cluster_manager = cluster_manager args.docker_cpu_limit = 3 args.docker_memory_limit = "4g" with mock.patch.object( self.instance_config, "get_env_dictionary", return_value={"env1": "val1"} ): retcode = configure_and_run_docker_container( args=args, docker_img="fake-registry/fake-service", instance_config=self.instance_config, system_paasta_config=self.system_paasta_config, aws_creds=("id", "secret", "token"), spark_conf=spark_conf, cluster_manager=cluster_manager, pod_template_path="unique-run", ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name="fake_app", volumes=( expected_volumes + ["/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw"] ), environment={ "env1": "val1", "AWS_ACCESS_KEY_ID": "id", "AWS_SECRET_ACCESS_KEY": "secret", "AWS_SESSION_TOKEN": "token", "AWS_DEFAULT_REGION": "fake_region", "SPARK_OPTS": mock_create_spark_config_str.return_value, "SPARK_USER": "******", "PAASTA_INSTANCE_TYPE": "spark", "PAASTA_LAUNCHED_BY": mock.ANY, }, docker_img="fake-registry/fake-service", docker_cmd=mock_get_docker_cmd.return_value, dry_run=True, nvidia=False, docker_memory_limit="4g", docker_cpu_limit=3, )
def test_configure_and_run_docker_container( mock_run_docker_container, mock_get_spark_configuration, mock_get_username, mock_pick_random_port, mock_getcwd, mock_os_path_exists, ): mock_pick_random_port.return_value = 123 mock_getcwd.return_value = 'fake_cwd' mock_get_username.return_value = 'fake_user' mock_get_spark_configuration.return_value = {'APP_NAME': 'fake_app'} mock_run_docker_container.return_value = 0 args = mock.MagicMock() args.cluster = 'fake_cluster' args.cmd = 'pyspark' args.dry_run = True retcode = configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=InstanceConfig( cluster='fake_cluster', instance='fake_instance', service='fake_service', config_dict={ 'extra_volumes': [{ "hostPath": "/h1", "containerPath": "/c1", "mode": "RO", }], }, branch_dict={'docker_image': 'fake_service:fake_sha'}, ), system_paasta_config=SystemPaastaConfig( { 'volumes': [{ "hostPath": "/h2", "containerPath": "/c2", "mode": "RO", }], }, 'fake_dir', ), ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name='paasta_spark_run_fake_user_123', volumes=[ '/h1:/c1:ro', '/h2:/c2:ro', 'fake_cwd:/spark_client:rw', '/etc/passwd:/etc/passwd:ro', '/etc/group:/etc/group:ro', ], environment={ 'PAASTA_SERVICE': 'fake_service', 'PAASTA_INSTANCE': 'fake_instance', 'PAASTA_CLUSTER': 'fake_cluster', 'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance', 'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha', 'APP_NAME': 'fake_app', }, docker_img='fake-registry/fake-service', docker_cmd='pyspark', dry_run=True, )
def test_configure_and_run_docker_container( mock_run_docker_container, mock_get_spark_conf_str, mock_get_username, mock_pick_random_port, mock_os_path_exists, mock_get_credentials, ): mock_pick_random_port.return_value = 123 mock_get_username.return_value = 'fake_user' mock_get_spark_conf_str.return_value = '--conf spark.app.name=fake_app' mock_run_docker_container.return_value = 0 mock_get_credentials.return_value = mock.MagicMock(access_key='id', secret_key='secret') args = mock.MagicMock() args.cluster = 'fake_cluster' args.cmd = 'pyspark' args.work_dir = '/fake_dir:/spark_driver' args.dry_run = True retcode = configure_and_run_docker_container( args=args, docker_img='fake-registry/fake-service', instance_config=InstanceConfig( cluster='fake_cluster', instance='fake_instance', service='fake_service', config_dict={ 'extra_volumes': [{ "hostPath": "/h1", "containerPath": "/c1", "mode": "RO", }], }, branch_dict={'docker_image': 'fake_service:fake_sha'}, ), system_paasta_config=SystemPaastaConfig( { 'volumes': [{ "hostPath": "/h2", "containerPath": "/c2", "mode": "RO", }], }, 'fake_dir', ), ) assert retcode == 0 mock_run_docker_container.assert_called_once_with( container_name='paasta_spark_run_fake_user_123', volumes=[ '/h1:/c1:ro', '/h2:/c2:ro', '/fake_dir:/spark_driver:rw', '/etc/passwd:/etc/passwd:ro', '/etc/group:/etc/group:ro', ], environment={ 'PAASTA_SERVICE': 'fake_service', 'PAASTA_INSTANCE': 'fake_instance', 'PAASTA_CLUSTER': 'fake_cluster', 'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance', 'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha', 'AWS_ACCESS_KEY_ID': 'id', 'AWS_SECRET_ACCESS_KEY': 'secret', 'SPARK_USER': '******', 'SPARK_OPTS': '--conf spark.app.name=fake_app', }, docker_img='fake-registry/fake-service', docker_cmd='pyspark --conf spark.app.name=fake_app', dry_run=True, )