def main():
    print('-------------------------------------------------------')
    print('Please run echo $$PAASTA_SYSTEM_CONFIG_DIR to continue')
    print(
        "Please set environment variable PAASTA_TEST_CLUSTER to the cluster you want to use."
    )
    print("This is necessary for tron jobs")
    print('-------------------------------------------------------')
    cluster = os.environ.get('PAASTA_TEST_CLUSTER', 'norcal-devc')
    config_path = 'etc_paasta_for_development'

    copy_tree('/etc/paasta', os.path.join(os.getcwd(), config_path))
    # Generate tron.json
    tron_config = {'tron': {'url': f'http://tron-{cluster}:8089'}}
    with open(config_path + '/tron.json', 'w') as f:
        json.dump(tron_config, f)
    # find unused port
    port = pick_random_port('paasta-dev-api')
    # Generate api endpoints
    api_endpoints = {'api_endpoints': {cluster: f'http://localhost:{port}'}}
    api_endpoints_path = os.path.join(os.getcwd(), config_path,
                                      'api_endpoints.json')
    os.chmod(api_endpoints_path, 0o777)
    with open(api_endpoints_path, 'w') as f:
        json.dump(api_endpoints, f)

    # export config path
    os.environ['PAASTA_SYSTEM_CONFIG_DIR'] = config_path
    os.execl(
        '.tox/py36-linux/bin/python',
        '.tox/py36-linux/bin/python',
        '-m',
        'paasta_tools.api.api',
        *['-D', '-c', cluster, str(port)],
    )
Example #2
0
def configure_and_run_docker_container(
    args,
    docker_img,
    instance_config,
    system_paasta_config,
):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('%s:%s:%s' %
                           (volume['hostPath'], volume['containerPath'],
                            volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume['hostPath'], ), )
    volumes.append('%s:%s:rw' % (os.getcwd(), DEFAULT_SPARK_WORK_DIR))

    if args.cmd is None:
        docker_cmd = instance_config.get_cmd()
    else:
        docker_cmd = args.cmd

    if docker_cmd is None:
        paasta_print(
            "A command is required, pyspark, spark-shell, spark-submit or jupyter",
            file=sys.stderr)
        return 1
    # Changes at docker ENTRYPOINT or CMD does not work.
    elif docker_cmd == 'jupyter':
        docker_cmd = 'jupyter notebook -y --ip=%s --notebook-dir=%s --allow-root' % (
            socket.getfqdn(),
            DEFAULT_SPARK_WORK_DIR,
        )

    spark_ui_port = pick_random_port(args.service)
    container_name = 'paasta_spark_run_%s_%s' % (get_username(), spark_ui_port)

    # Do not put memory and CPU limits on Spark driver for now.
    # Toree won't work with the default memory-swap setting.
    environment = instance_config.get_env_dictionary()
    environment.update(
        get_spark_configuration(
            args,
            container_name,
            spark_ui_port,
            docker_img,
            system_paasta_config,
        ), )

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
    )
Example #3
0
def test_pick_random_port():
    def fake_epr(ip, port):
        return port

    with mock.patch.object(
        ephemeral_port_reserve, 'reserve', side_effect=fake_epr,
    ), mock.patch.object(
        getpass, 'getuser', return_value='nobody', autospec=True,
    ):
        # Two calls with the same service should try to reserve the same port.
        port1 = utils.pick_random_port('fake_service')
        port2 = utils.pick_random_port('fake_service')
        assert port1 == port2
        assert 33000 <= port1 < 58000

        # A third call with a different service should try to reserve a different port.
        port3 = utils.pick_random_port('different_fake_service')
        assert port1 != port3
        assert 33000 <= port3 < 58000
def main():
    print("-------------------------------------------------------")
    print(
        "Please run export PAASTA_SYSTEM_CONFIG_DIR=etc_paasta_for_development to continue"
    )
    print(
        "Please set environment variable PAASTA_TEST_CLUSTER to the cluster you want to use."
    )
    print("This is necessary for tron jobs")
    print("-------------------------------------------------------")
    cluster = os.environ.get("PAASTA_TEST_CLUSTER", "norcal-devc")
    config_path = "etc_paasta_for_development"

    copy_tree("/etc/paasta", os.path.join(os.getcwd(), config_path))
    # Generate tron.json
    tron_config = {"tron": {"url": f"http://tron-{cluster}:8089"}}
    with open(config_path + "/tron.json", "w") as f:
        json.dump(tron_config, f)
    # find unused port
    port = pick_random_port("paasta-dev-api")
    # Generate api endpoints
    api_endpoints = {"api_endpoints": {cluster: f"http://localhost:{port}"}}
    api_endpoints_path = os.path.join(os.getcwd(), config_path,
                                      "api_endpoints.json")
    os.chmod(api_endpoints_path, 0o777)
    with open(api_endpoints_path, "w") as f:
        json.dump(api_endpoints, f)

    # export config path
    os.environ["PAASTA_SYSTEM_CONFIG_DIR"] = config_path
    os.execl(
        ".tox/py36-linux/bin/python",
        ".tox/py36-linux/bin/python",
        "-m",
        "paasta_tools.api.api",
        *["-D", "-c", cluster, str(port)],
    )
Example #5
0
def run_docker_container(
    docker_client,
    service,
    instance,
    docker_url,
    volumes,
    interactive,
    command,
    healthcheck,
    healthcheck_only,
    user_port,
    instance_config,
    secret_provider_name,
    soa_dir=DEFAULT_SOA_DIR,
    dry_run=False,
    json_dict=False,
    framework=None,
    secret_provider_kwargs={},
    skip_secrets=False,
):
    """docker-py has issues running a container with a TTY attached, so for
    consistency we execute 'docker run' directly in both interactive and
    non-interactive modes.

    In non-interactive mode when the run is complete, stop the container and
    remove it (with docker-py).
    """
    if user_port:
        if check_if_port_free(user_port):
            chosen_port = user_port
        else:
            paasta_print(
                PaastaColors.red(
                    "The chosen port is already in use!\n"
                    "Try specifying another one, or omit (--port|-o) and paasta will find a free one for you"
                ),
                file=sys.stderr,
            )
            sys.exit(1)
    else:
        chosen_port = pick_random_port(service)
    environment = instance_config.get_env_dictionary()
    if not skip_secrets:
        secret_environment = decrypt_secret_environment_variables(
            secret_provider_name=secret_provider_name,
            environment=environment,
            soa_dir=soa_dir,
            service_name=service,
            cluster_name=instance_config.cluster,
            secret_provider_kwargs=secret_provider_kwargs,
        )
        environment.update(secret_environment)
    local_run_environment = get_local_run_environment_vars(
        instance_config=instance_config, port0=chosen_port, framework=framework
    )
    environment.update(local_run_environment)
    net = instance_config.get_net()
    memory = instance_config.get_mem()
    container_name = get_container_name()
    docker_params = instance_config.format_docker_parameters()

    healthcheck_mode, healthcheck_data = get_healthcheck_for_instance(
        service, instance, instance_config, chosen_port, soa_dir=soa_dir
    )
    if healthcheck_mode is None:
        container_port = None
        interactive = True
    elif not user_port and not healthcheck and not healthcheck_only:
        container_port = None
    else:
        try:
            container_port = instance_config.get_container_port()
        except AttributeError:
            container_port = None

    simulate_healthcheck = (
        healthcheck_only or healthcheck
    ) and healthcheck_mode is not None

    docker_run_args = dict(
        memory=memory,
        chosen_port=chosen_port,
        container_port=container_port,
        container_name=container_name,
        volumes=volumes,
        env=environment,
        interactive=interactive,
        detach=simulate_healthcheck,
        docker_hash=docker_url,
        command=command,
        net=net,
        docker_params=docker_params,
    )
    docker_run_cmd = get_docker_run_cmd(**docker_run_args)
    joined_docker_run_cmd = " ".join(docker_run_cmd)

    if dry_run:
        if json_dict:
            paasta_print(json.dumps(docker_run_args))
        else:
            paasta_print(json.dumps(docker_run_cmd))
        return 0
    else:
        paasta_print(
            "Running docker command:\n%s" % PaastaColors.grey(joined_docker_run_cmd)
        )

    merged_env = {**os.environ, **environment}

    if interactive or not simulate_healthcheck:
        # NOTE: This immediately replaces us with the docker run cmd. Docker
        # run knows how to clean up the running container in this situation.
        wrapper_path = shutil.which("paasta_docker_wrapper")
        # To properly simulate mesos, we pop the PATH, which is not available to
        # The executor
        merged_env.pop("PATH")
        execlpe(wrapper_path, *docker_run_cmd, merged_env)
        # For testing, when execlpe is patched out and doesn't replace us, we
        # still want to bail out.
        return 0

    container_started = False
    container_id = None
    try:
        (returncode, output) = _run(docker_run_cmd, env=merged_env)
        if returncode != 0:
            paasta_print(
                "Failure trying to start your container!"
                "Returncode: %d"
                "Output:"
                "%s"
                ""
                "Fix that problem and try again."
                "http://y/paasta-troubleshooting" % (returncode, output),
                sep="\n",
            )
            # Container failed to start so no need to cleanup; just bail.
            sys.exit(1)
        container_started = True
        container_id = get_container_id(docker_client, container_name)
        paasta_print("Found our container running with CID %s" % container_id)

        if simulate_healthcheck:
            healthcheck_result = simulate_healthcheck_on_service(
                instance_config=instance_config,
                docker_client=docker_client,
                container_id=container_id,
                healthcheck_mode=healthcheck_mode,
                healthcheck_data=healthcheck_data,
                healthcheck_enabled=healthcheck,
            )

        def _output_exit_code():
            returncode = docker_client.inspect_container(container_id)["State"][
                "ExitCode"
            ]
            paasta_print(f"Container exited: {returncode})")

        if healthcheck_only:
            if container_started:
                _output_exit_code()
                _cleanup_container(docker_client, container_id)
            if healthcheck_mode is None:
                paasta_print(
                    "--healthcheck-only, but no healthcheck is defined for this instance!"
                )
                sys.exit(1)
            elif healthcheck_result is True:
                sys.exit(0)
            else:
                sys.exit(1)

        running = docker_client.inspect_container(container_id)["State"]["Running"]
        if running:
            paasta_print("Your service is now running! Tailing stdout and stderr:")
            for line in docker_client.attach(
                container_id, stderr=True, stream=True, logs=True
            ):
                paasta_print(line)
        else:
            _output_exit_code()
            returncode = 3

    except KeyboardInterrupt:
        returncode = 3

    # Cleanup if the container exits on its own or interrupted.
    if container_started:
        returncode = docker_client.inspect_container(container_id)["State"]["ExitCode"]
        _cleanup_container(docker_client, container_id)
    return returncode
Example #6
0
def configure_and_run_docker_container(
    args,
    docker_img,
    instance_config,
    system_paasta_config,
):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('{}:{}:{}'.format(volume['hostPath'],
                                             volume['containerPath'],
                                             volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume['hostPath'], ), )

    spark_ui_port = pick_random_port(args.service + str(os.getpid()))
    spark_app_name = 'paasta_spark_run_{}'.format(get_username())
    container_name = spark_app_name + "_" + str(spark_ui_port)
    original_docker_cmd = args.cmd or instance_config.get_cmd()
    if 'jupyter' not in original_docker_cmd:
        spark_app_name = container_name

    spark_config_dict = get_spark_config(
        args=args,
        spark_app_name=spark_app_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
    )
    spark_conf_str = create_spark_config_str(spark_config_dict)

    # Spark client specific volumes
    volumes.append('%s:rw' % args.work_dir)
    volumes.append('/etc/passwd:/etc/passwd:ro')
    volumes.append('/etc/group:/etc/group:ro')

    environment = instance_config.get_env_dictionary()
    environment.update(get_spark_env(
        args,
        spark_conf_str,
        spark_ui_port,
    ), )

    webui_url = f'http://{socket.getfqdn()}:{spark_ui_port}'

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if 'history-server' in docker_cmd:
        paasta_print(f'\nSpark history server URL {webui_url}\n')
    elif any(c in docker_cmd
             for c in ['pyspark', 'spark-shell', 'spark-submit']):
        paasta_print(f'\nSpark monitoring URL {webui_url}\n')

    if clusterman_metrics and _should_emit_resource_requirements(docker_cmd):
        try:
            emit_resource_requirements(spark_config_dict, args.cluster,
                                       webui_url)
        except Boto3Error as e:
            paasta_print(
                PaastaColors.red(
                    f'Encountered {e} while attempting to send resource requirements to Clusterman.'
                ), )
            if args.suppress_clusterman_metrics_errors:
                paasta_print(
                    'Continuing anyway since --suppress-clusterman-metrics-errors was passed'
                )
            else:
                raise

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
    )
Example #7
0
def configure_and_run_docker_container(args, docker_img, instance_config,
                                       system_paasta_config):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume["hostPath"]):
            volumes.append("{}:{}:{}".format(volume["hostPath"],
                                             volume["containerPath"],
                                             volume["mode"].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume["hostPath"]),
                file=sys.stderr,
            )

    spark_ui_port = pick_random_port(args.service + str(os.getpid()))
    spark_app_name = "paasta_spark_run_{}".format(get_username())
    container_name = spark_app_name + "_" + str(spark_ui_port)
    original_docker_cmd = args.cmd or instance_config.get_cmd()
    if "jupyter" not in original_docker_cmd:
        spark_app_name = container_name

    access_key, secret_key = get_aws_credentials(args)
    spark_config_dict = get_spark_config(
        args=args,
        spark_app_name=spark_app_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
        access_key=access_key,
        secret_key=secret_key,
    )
    spark_conf_str = create_spark_config_str(spark_config_dict,
                                             is_mrjob=args.mrjob)

    # Spark client specific volumes
    volumes.append("%s:rw" % args.work_dir)
    volumes.append("/etc/passwd:/etc/passwd:ro")
    volumes.append("/etc/group:/etc/group:ro")
    volumes.append("/nail/home:/nail/home:rw")

    environment = instance_config.get_env_dictionary()
    environment.update(
        get_spark_env(args, spark_conf_str, spark_ui_port, access_key,
                      secret_key))

    webui_url = f"http://{socket.getfqdn()}:{spark_ui_port}"

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if "history-server" in docker_cmd:
        paasta_print(f"\nSpark history server URL {webui_url}\n")
    elif any(c in docker_cmd
             for c in ["pyspark", "spark-shell", "spark-submit"]):
        paasta_print(f"\nSpark monitoring URL {webui_url}\n")

    if clusterman_metrics and _should_emit_resource_requirements(
            docker_cmd, args.mrjob):
        try:
            emit_resource_requirements(spark_config_dict, args.cluster,
                                       webui_url)
        except Boto3Error as e:
            paasta_print(
                PaastaColors.red(
                    f"Encountered {e} while attempting to send resource requirements to Clusterman."
                ))
            if args.suppress_clusterman_metrics_errors:
                paasta_print(
                    "Continuing anyway since --suppress-clusterman-metrics-errors was passed"
                )
            else:
                raise

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
        nvidia=args.nvidia,
    )
Example #8
0
def configure_and_run_docker_container(
    args,
    docker_img,
    instance_config,
    system_paasta_config,
):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('{}:{}:{}'.format(volume['hostPath'],
                                             volume['containerPath'],
                                             volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume['hostPath'], ), )

    spark_ui_port = pick_random_port(args.service)
    container_name = 'paasta_spark_run_{}_{}'.format(get_username(),
                                                     spark_ui_port)

    spark_conf_str = get_spark_conf_str(
        args=args,
        container_name=container_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
    )

    # Spark client specific volumes
    volumes.append('%s:rw' % args.work_dir)
    volumes.append('/etc/passwd:/etc/passwd:ro')
    volumes.append('/etc/group:/etc/group:ro')

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if docker_cmd is None:
        paasta_print(
            "A command is required, pyspark, spark-shell, spark-submit or jupyter",
            file=sys.stderr)
        return 1

    environment = instance_config.get_env_dictionary()
    environment.update(get_spark_env(
        args,
        spark_conf_str,
    ), )

    paasta_print('\nSpark Monitoring URL http://%s:%d\n' %
                 (socket.getfqdn(), spark_ui_port))

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
    )
Example #9
0
def run_docker_container(
    docker_client,
    service,
    instance,
    docker_hash,
    volumes,
    interactive,
    command,
    healthcheck,
    healthcheck_only,
    user_port,
    instance_config,
    soa_dir=DEFAULT_SOA_DIR,
    dry_run=False,
    json_dict=False,
    framework=None,
):
    """docker-py has issues running a container with a TTY attached, so for
    consistency we execute 'docker run' directly in both interactive and
    non-interactive modes.

    In non-interactive mode when the run is complete, stop the container and
    remove it (with docker-py).
    """
    if user_port:
        if check_if_port_free(user_port):
            chosen_port = user_port
        else:
            paasta_print(
                PaastaColors.red(
                    "The chosen port is already in use!\n"
                    "Try specifying another one, or omit (--port|-o) and paasta will find a free one for you",
                ),
                file=sys.stderr,
            )
            sys.exit(1)
    else:
        chosen_port = pick_random_port(service)
    environment = instance_config.get_env_dictionary()
    local_run_environment = get_local_run_environment_vars(
        instance_config=instance_config,
        port0=chosen_port,
        framework=framework,
    )
    environment.update(local_run_environment)
    net = instance_config.get_net()
    memory = instance_config.get_mem()
    container_name = get_container_name()
    docker_params = instance_config.format_docker_parameters()

    healthcheck_mode, healthcheck_data = get_healthcheck_for_instance(
        service,
        instance,
        instance_config,
        chosen_port,
        soa_dir=soa_dir,
    )
    if healthcheck_mode is None:
        container_port = None
        interactive = True
    elif not user_port and not healthcheck and not healthcheck_only:
        container_port = None
    else:
        try:
            container_port = instance_config.get_container_port()
        except AttributeError:
            container_port = None

    simulate_healthcheck = (healthcheck_only
                            or healthcheck) and healthcheck_mode is not None

    docker_run_args = dict(
        memory=memory,
        chosen_port=chosen_port,
        container_port=container_port,
        container_name=container_name,
        volumes=volumes,
        env=environment,
        interactive=interactive,
        detach=simulate_healthcheck,
        docker_hash=docker_hash,
        command=command,
        net=net,
        docker_params=docker_params,
    )
    docker_run_cmd = get_docker_run_cmd(**docker_run_args)
    joined_docker_run_cmd = ' '.join(docker_run_cmd)

    if dry_run:
        if json_dict:
            paasta_print(json.dumps(docker_run_args))
        else:
            paasta_print(json.dumps(docker_run_cmd))
        return 0
    else:
        paasta_print('Running docker command:\n%s' %
                     PaastaColors.grey(joined_docker_run_cmd))

    if interactive or not simulate_healthcheck:
        # NOTE: This immediately replaces us with the docker run cmd. Docker
        # run knows how to clean up the running container in this situation.
        execlp('paasta_docker_wrapper', *docker_run_cmd)
        # For testing, when execlp is patched out and doesn't replace us, we
        # still want to bail out.
        return 0

    container_started = False
    container_id = None
    try:
        (returncode, output) = _run(docker_run_cmd)
        if returncode != 0:
            paasta_print(
                'Failure trying to start your container!'
                'Returncode: %d'
                'Output:'
                '%s'
                ''
                'Fix that problem and try again.'
                'http://y/paasta-troubleshooting' % (returncode, output),
                sep='\n',
            )
            # Container failed to start so no need to cleanup; just bail.
            sys.exit(1)
        container_started = True
        container_id = get_container_id(docker_client, container_name)
        paasta_print('Found our container running with CID %s' % container_id)

        if simulate_healthcheck:
            healthcheck_result = simulate_healthcheck_on_service(
                instance_config=instance_config,
                docker_client=docker_client,
                container_id=container_id,
                healthcheck_mode=healthcheck_mode,
                healthcheck_data=healthcheck_data,
                healthcheck_enabled=healthcheck,
            )

        def _output_stdout_and_exit_code():
            returncode = docker_client.inspect_container(
                container_id)['State']['ExitCode']
            paasta_print('Container exited: %d)' % returncode)
            paasta_print('Here is the stdout and stderr:\n\n')
            paasta_print(
                docker_client.attach(container_id,
                                     stderr=True,
                                     stream=False,
                                     logs=True), )

        if healthcheck_only:
            if container_started:
                _output_stdout_and_exit_code()
                _cleanup_container(docker_client, container_id)
            if healthcheck_mode is None:
                paasta_print(
                    '--healthcheck-only, but no healthcheck is defined for this instance!'
                )
                sys.exit(1)
            elif healthcheck_result is True:
                sys.exit(0)
            else:
                sys.exit(1)

        running = docker_client.inspect_container(
            container_id)['State']['Running']
        if running:
            paasta_print(
                'Your service is now running! Tailing stdout and stderr:')
            for line in docker_client.attach(container_id,
                                             stderr=True,
                                             stream=True,
                                             logs=True):
                paasta_print(line)
        else:
            _output_stdout_and_exit_code()
            returncode = 3

    except KeyboardInterrupt:
        returncode = 3

    # Cleanup if the container exits on its own or interrupted.
    if container_started:
        returncode = docker_client.inspect_container(
            container_id)['State']['ExitCode']
        _cleanup_container(docker_client, container_id)
    return returncode
Example #10
0
def configure_and_run_docker_container(
    args,
    docker_img,
    instance_config,
    system_paasta_config,
):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('%s:%s:%s' %
                           (volume['hostPath'], volume['containerPath'],
                            volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume['hostPath'], ), )

    spark_ui_port = pick_random_port(args.service)
    container_name = 'paasta_spark_run_%s_%s' % (get_username(), spark_ui_port)

    spark_conf_str = get_spark_conf_str(
        args=args,
        container_name=container_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
    )

    # Spark client specific volumes
    volumes.append('%s:rw' % args.work_dir)
    volumes.append('/etc/passwd:/etc/passwd:ro')
    volumes.append('/etc/group:/etc/group:ro')

    if args.cmd is None:
        docker_cmd = instance_config.get_cmd()
    else:
        docker_cmd = args.cmd

    if docker_cmd is None:
        paasta_print(
            "A command is required, pyspark, spark-shell, spark-submit or jupyter",
            file=sys.stderr)
        return 1

    # Default cli options to start the jupyter notebook server.
    if docker_cmd == 'jupyter':
        docker_cmd = 'jupyter notebook -y --ip=%s --notebook-dir=%s' % (
            socket.getfqdn(),
            args.work_dir.split(':')[1],
        )
    # Spark options are passed as options to pyspark and spark-shell.
    # For jupyter, environment variable SPARK_OPTS is set instead.
    elif docker_cmd in ['pyspark', 'spark-shell']:
        docker_cmd = docker_cmd + ' ' + spark_conf_str
    elif docker_cmd.startswith('spark-submit'):
        docker_cmd = 'spark-submit ' + spark_conf_str + docker_cmd[
            len('spark-submit'):]

    environment = instance_config.get_env_dictionary()
    environment.update(get_spark_env(
        args,
        spark_conf_str,
    ), )

    paasta_print('\nSpark Monitoring URL http://%s:%d\n' %
                 (socket.getfqdn(), spark_ui_port))

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
    )