Esempio n. 1
0
def paasta_local_run(args):
    if args.pull:
        build = False
    elif args.build:
        build = True
    else:
        build = local_makefile_present()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
    base_docker_url = get_docker_host()
    docker_client = Client(base_url=base_docker_url)

    if build:
        default_tag = 'paasta-local-run-%s-%s' % (service, get_username())
        tag = os.environ.get('DOCKER_TAG', default_tag)
        os.environ['DOCKER_TAG'] = tag
        pull_image = False
        paasta_cook_image(None, service=service, soa_dir=args.yelpsoa_config_root)
    else:
        pull_image = True
        tag = None

    try:
        configure_and_run_docker_container(
            docker_client=docker_client,
            docker_hash=tag,
            service=service,
            args=args,
            pull_image=pull_image,
        )
    except errors.APIError as e:
        sys.stderr.write('Can\'t run Docker container. Error: %s\n' % str(e))
        sys.exit(1)
Esempio n. 2
0
def log_event(service_config, desired_state):
    user = utils.get_username()
    host = socket.getfqdn()
    line = "Issued request to change state of %s to '%s' by %s@%s" % (
        service_config.get_instance(), desired_state, user, host)
    utils._log(
        service=service_config.get_service(),
        level='event',
        cluster=service_config.get_cluster(),
        instance=service_config.get_instance(),
        component='deploy',
        line=line,
    )
Esempio n. 3
0
def submit_performance_check_job(service, commit, image):
    performance_check_config = load_performance_check_config()
    payload = {
        'service': service,
        'commit': commit,
        'submitter': get_username(),
        'image': image,
    }
    r = requests.post(
        url=performance_check_config['endpoint'],
        data=payload,
    )
    print "Posted a submission to the PaaSTA performance-check service:"
    print r.text
Esempio n. 4
0
def build_and_push_docker_image(args):
    """
    Build an image if the default Spark service image is not preferred.
    The image needs to be pushed to a registry for the Spark executors
    to pull.
    """
    if not makefile_responds_to('cook-image'):
        paasta_print(
            "A local Makefile with a 'cook-image' target is required for --build",
            file=sys.stderr,
        )
        return None

    default_tag = '{}-{}'.format(DEFAULT_SPARK_DOCKER_IMAGE_PREFIX, get_username())
    docker_tag = os.environ.get('DOCKER_TAG', default_tag)
    os.environ['DOCKER_TAG'] = docker_tag

    cook_return = paasta_cook_image(
        args=None,
        service=args.service,
        soa_dir=args.yelpsoa_config_root,
    )
    if cook_return is not 0:
        return None

    docker_url = f'{args.docker_registry}/{docker_tag}'
    command = f'docker tag {docker_tag} {docker_url}'
    paasta_print(PaastaColors.grey(command))
    retcode, _ = _run(command, stream=True)
    if retcode is not 0:
        return None

    if args.docker_registry != DEFAULT_SPARK_DOCKER_REGISTRY:
        command = 'sudo -H docker push %s' % docker_url
    else:
        command = 'docker push %s' % docker_url

    paasta_print(PaastaColors.grey(command))
    retcode, output = _run(command, stream=True)
    if retcode is not 0:
        return None

    return docker_url
Esempio n. 5
0
def paasta_local_run(args):
    if args.pull or args.dry_run:
        build = False
    elif args.build:
        build = True
    else:
        build = local_makefile_present()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
    cluster = guess_cluster(service=service, args=args)
    instance = guess_instance(service=service, cluster=cluster, args=args)
    docker_client = get_docker_client()

    if build:
        default_tag = 'paasta-local-run-%s-%s' % (service, get_username())
        tag = os.environ.get('DOCKER_TAG', default_tag)
        os.environ['DOCKER_TAG'] = tag
        pull_image = False
        cook_return = paasta_cook_image(args=None, service=service, soa_dir=args.yelpsoa_config_root)
        if cook_return != 0:
            return cook_return
    elif args.dry_run:
        pull_image = False
        tag = None
    else:
        pull_image = True
        tag = None

    try:
        configure_and_run_docker_container(
            docker_client=docker_client,
            docker_hash=tag,
            service=service,
            instance=instance,
            cluster=cluster,
            args=args,
            pull_image=pull_image,
            dry_run=args.dry_run,
        )
    except errors.APIError as e:
        sys.stderr.write('Can\'t run Docker container. Error: %s\n' % str(e))
        return 1
Esempio n. 6
0
def paasta_local_run(args):
    if args.pull or args.dry_run:
        build = False
    elif args.build:
        build = True
    else:
        build = local_makefile_present()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
    cluster = guess_cluster(service=service, args=args)
    instance = guess_instance(service=service, cluster=cluster, args=args)
    docker_client = get_docker_client()

    if build:
        default_tag = 'paasta-local-run-%s-%s' % (service, get_username())
        tag = os.environ.get('DOCKER_TAG', default_tag)
        os.environ['DOCKER_TAG'] = tag
        pull_image = False
        cook_return = paasta_cook_image(args=None, service=service, soa_dir=args.yelpsoa_config_root)
        if cook_return != 0:
            return cook_return
    elif args.dry_run:
        pull_image = False
        tag = None
    else:
        pull_image = True
        tag = None

    try:
        configure_and_run_docker_container(
            docker_client=docker_client,
            docker_hash=tag,
            service=service,
            instance=instance,
            cluster=cluster,
            args=args,
            pull_image=pull_image,
            dry_run=args.dry_run,
        )
    except errors.APIError as e:
        sys.stderr.write('Can\'t run Docker container. Error: %s\n' % str(e))
        return 1
Esempio n. 7
0
def paasta_cook_image(args, service=None, soa_dir=None):
    """Build a docker image"""
    if service:
        service = service
    else:
        service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]
    validate_service_name(service, soa_dir)

    run_env = os.environ.copy()
    default_tag = 'paasta-cook-image-%s-%s' % (service, get_username())
    tag = run_env.get('DOCKER_TAG', default_tag)
    run_env['DOCKER_TAG'] = tag

    if not makefile_responds_to('cook-image'):
        paasta_print('ERROR: local-run now requires a cook-image target to be present in the Makefile. See'
                     'http://paasta.readthedocs.io/en/latest/about/contract.html', file=sys.stderr)
        return 1

    try:
        cmd = 'make cook-image'
        returncode, output = _run(
            cmd,
            env=run_env,
            log=True,
            component='build',
            service=service,
            loglevel='debug'
        )
        if returncode != 0:
            _log(
                service=service,
                line='ERROR: make cook-image failed for %s.' % service,
                component='build',
                level='event',
            )
        return returncode

    except KeyboardInterrupt:
        paasta_print('\nProcess interrupted by the user. Cancelling.', file=sys.stderr)
        return 2
Esempio n. 8
0
def paasta_cook_image(args, service=None, soa_dir=None):
    """Build a docker image"""
    if service:
        service = service
    else:
        service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]
    validate_service_name(service, soa_dir)

    run_env = os.environ.copy()
    default_tag = 'paasta-cook-image-%s-%s' % (service, get_username())
    tag = run_env.get('DOCKER_TAG', default_tag)
    run_env['DOCKER_TAG'] = tag

    if not makefile_responds_to('cook-image'):
        sys.stderr.write('ERROR: local-run now requires a cook-image target to be present in the Makefile. See '
                         'http://paasta.readthedocs.io/en/latest/about/contract.html\n')
        return 1

    try:
        cmd = 'make cook-image'
        returncode, output = _run(
            cmd,
            env=run_env,
            log=True,
            component='build',
            service=service,
            loglevel='debug'
        )
        if returncode != 0:
            _log(
                service=service,
                line='ERROR: make cook-image failed for %s.' % service,
                component='build',
                level='event',
            )
        return returncode

    except KeyboardInterrupt:
        sys.stderr.write('\nProcess interrupted by the user. Cancelling.\n')
        return 2
Esempio n. 9
0
def paasta_local_run(args):
    validate_environment()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)

    base_docker_url = get_docker_host()

    docker_client = Client(base_url=base_docker_url)

    default_tag = 'paasta-local-run-%s-%s' % (service, get_username())
    tag = os.environ.get('DOCKER_TAG', default_tag)
    os.environ['DOCKER_TAG'] = tag

    paasta_cook_image(None, service=service, soa_dir=args.yelpsoa_config_root)

    try:
        configure_and_run_docker_container(docker_client, tag, service, args)
    except errors.APIError as e:
        sys.stderr.write('Can\'t run Docker container. Error: %s\n' % str(e))
        sys.exit(1)
Esempio n. 10
0
def paasta_local_run(args):
    validate_environment()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)

    base_docker_url = get_docker_host()

    docker_client = Client(base_url=base_docker_url)

    default_tag = 'paasta-local-run-%s-%s' % (service, get_username())
    tag = os.environ.get('DOCKER_TAG', default_tag)
    os.environ['DOCKER_TAG'] = tag

    paasta_cook_image(None, service=service, soa_dir=args.yelpsoa_config_root)

    try:
        configure_and_run_docker_container(docker_client, tag, service, args)
    except errors.APIError as e:
        sys.stderr.write('Can\'t run Docker container. Error: %s\n' % str(e))
        sys.exit(1)
Esempio n. 11
0
def get_docker_cmd(args, instance_config, spark_conf_str):
    original_docker_cmd = args.cmd or instance_config.get_cmd()

    if args.mrjob:
        return original_docker_cmd + " " + spark_conf_str
    # Default cli options to start the jupyter notebook server.
    elif original_docker_cmd == "jupyter-lab":
        cull_opts = ("--MappingKernelManager.cull_idle_timeout=%s " %
                     args.cull_idle_timeout)
        if args.not_cull_connected is False:
            cull_opts += "--MappingKernelManager.cull_connected=True "

        return "SHELL=bash USER={} /source/virtualenv_run_jupyter/bin/jupyter-lab -y --ip={} {}".format(
            get_username(), socket.getfqdn(), cull_opts)
    elif original_docker_cmd == "history-server":
        return "start-history-server.sh"
    # Spark options are passed as options to pyspark and spark-shell.
    # For jupyter, environment variable SPARK_OPTS is set instead.
    else:
        return inject_spark_conf_str(original_docker_cmd, spark_conf_str)
Esempio n. 12
0
def test_run_success(
    mock_log_audit,
    mock_run,
    mock_makefile_responds_to,
    mock_validate_service_name,
):
    mock_run.return_value = (0, 'Output')
    mock_makefile_responds_to.return_value = True
    mock_validate_service_name.return_value = True

    args = mock.MagicMock()
    args.service = 'fake_service'
    assert paasta_cook_image(args) is 0

    mock_log_audit.assert_called_once_with(
        action='cook-image',
        action_details={
            'tag': 'paasta-cook-image-fake_service-{}'.format(get_username())
        },
        service='fake_service',
    )
Esempio n. 13
0
def paasta_local_run(args):
    if args.action == 'build' and not makefile_responds_to('cook-image'):
        sys.stderr.write("A local Makefile with a 'cook-image' target is required for --build\n")
        sys.stderr.write("If you meant to pull the docker image from the registry, explicitly pass --pull\n")
        return 1

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
    cluster = guess_cluster(service=service, args=args)
    instance = guess_instance(service=service, cluster=cluster, args=args)
    docker_client = get_docker_client()

    if args.action == 'build':
        default_tag = 'paasta-local-run-%s-%s' % (service, get_username())
        tag = os.environ.get('DOCKER_TAG', default_tag)
        os.environ['DOCKER_TAG'] = tag
        pull_image = False
        cook_return = paasta_cook_image(args=None, service=service, soa_dir=args.yelpsoa_config_root)
        if cook_return != 0:
            return cook_return
    elif args.action == 'dry_run':
        pull_image = False
        tag = None
    else:
        pull_image = True
        tag = None

    try:
        configure_and_run_docker_container(
            docker_client=docker_client,
            docker_hash=tag,
            service=service,
            instance=instance,
            cluster=cluster,
            args=args,
            pull_image=pull_image,
            dry_run=args.action == 'dry_run',
        )
    except errors.APIError as e:
        sys.stderr.write('Can\'t run Docker container. Error: %s\n' % str(e))
        return 1
Esempio n. 14
0
def get_spark_app_name(original_docker_cmd: Union[Any, str, List[str]],
                       spark_ui_port: int) -> str:
    """Use submitted batch name as default spark_run job name"""
    docker_cmds = (shlex.split(original_docker_cmd) if isinstance(
        original_docker_cmd, str) else original_docker_cmd)
    spark_app_name = None
    after_spark_submit = False
    for arg in docker_cmds:
        if arg == "spark-submit":
            after_spark_submit = True
        elif after_spark_submit and arg.endswith(".py"):
            batch_name = arg.split("/")[-1].replace(".py", "")
            spark_app_name = "paasta_" + batch_name
            break
        elif arg == "jupyter-lab":
            spark_app_name = "paasta_jupyter"
            break

    if spark_app_name is None:
        spark_app_name = "paasta_spark_run"

    spark_app_name += "_{}_{}".format(get_username(), spark_ui_port)
    return spark_app_name
Esempio n. 15
0
def get_container_name():
    return 'paasta_local_run_%s_%s' % (get_username(), randint(1, 999999))
Esempio n. 16
0
def configure_and_run_docker_container(
    args,
    docker_img,
    instance_config,
    system_paasta_config,
):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('%s:%s:%s' %
                           (volume['hostPath'], volume['containerPath'],
                            volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume['hostPath'], ), )
    volumes.append('%s:%s:rw' % (os.getcwd(), DEFAULT_SPARK_WORK_DIR))
    volumes.append('/etc/passwd:/etc/passwd:ro')
    volumes.append('/etc/group:/etc/group:ro')

    if args.cmd is None:
        docker_cmd = instance_config.get_cmd()
    else:
        docker_cmd = args.cmd

    if docker_cmd is None:
        paasta_print(
            "A command is required, pyspark, spark-shell, spark-submit or jupyter",
            file=sys.stderr)
        return 1
    # Changes at docker ENTRYPOINT or CMD does not work.
    elif docker_cmd == 'jupyter':
        docker_cmd = 'jupyter notebook -y --ip=%s --notebook-dir=%s' % (
            socket.getfqdn(),
            DEFAULT_SPARK_WORK_DIR,
        )

    spark_ui_port = pick_random_port(args.service)
    container_name = 'paasta_spark_run_%s_%s' % (get_username(), spark_ui_port)

    # Do not put memory and CPU limits on Spark driver for now.
    # Toree won't work with the default memory-swap setting.
    environment = instance_config.get_env_dictionary()
    environment.update(
        get_spark_configuration(
            args,
            container_name,
            spark_ui_port,
            docker_img,
            system_paasta_config,
        ), )

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
    )
Esempio n. 17
0
def configure_and_run_docker_container(
    args,
    docker_img,
    instance_config,
    system_paasta_config,
):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('%s:%s:%s' %
                           (volume['hostPath'], volume['containerPath'],
                            volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume['hostPath'], ), )

    spark_ui_port = pick_random_port(args.service)
    container_name = 'paasta_spark_run_%s_%s' % (get_username(), spark_ui_port)

    spark_conf_str = get_spark_conf_str(
        args=args,
        container_name=container_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
    )

    # Spark client specific volumes
    volumes.append('%s:rw' % args.work_dir)
    volumes.append('/etc/passwd:/etc/passwd:ro')
    volumes.append('/etc/group:/etc/group:ro')

    if args.cmd is None:
        docker_cmd = instance_config.get_cmd()
    else:
        docker_cmd = args.cmd

    if docker_cmd is None:
        paasta_print(
            "A command is required, pyspark, spark-shell, spark-submit or jupyter",
            file=sys.stderr)
        return 1

    # Default cli options to start the jupyter notebook server.
    if docker_cmd == 'jupyter':
        docker_cmd = 'jupyter notebook -y --ip=%s --notebook-dir=%s' % (
            socket.getfqdn(),
            args.work_dir.split(':')[1],
        )
    # Spark options are passed as options to pyspark and spark-shell.
    # For jupyter, environment variable SPARK_OPTS is set instead.
    elif docker_cmd in ['pyspark', 'spark-shell']:
        docker_cmd = docker_cmd + ' ' + spark_conf_str
    elif docker_cmd.startswith('spark-submit'):
        docker_cmd = 'spark-submit ' + spark_conf_str + docker_cmd[
            len('spark-submit'):]

    environment = instance_config.get_env_dictionary()
    environment.update(get_spark_env(
        args,
        spark_conf_str,
    ), )

    paasta_print('\nSpark Monitoring URL http://%s:%d\n' %
                 (socket.getfqdn(), spark_ui_port))

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
    )
Esempio n. 18
0
def configure_and_run_docker_container(
    args,
    docker_img,
    instance_config,
    system_paasta_config,
):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('{}:{}:{}'.format(volume['hostPath'],
                                             volume['containerPath'],
                                             volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume['hostPath'], ), )

    spark_ui_port = pick_random_port(args.service)
    container_name = 'paasta_spark_run_{}_{}'.format(get_username(),
                                                     spark_ui_port)

    spark_conf_str = get_spark_conf_str(
        args=args,
        container_name=container_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
    )

    # Spark client specific volumes
    volumes.append('%s:rw' % args.work_dir)
    volumes.append('/etc/passwd:/etc/passwd:ro')
    volumes.append('/etc/group:/etc/group:ro')

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if docker_cmd is None:
        paasta_print(
            "A command is required, pyspark, spark-shell, spark-submit or jupyter",
            file=sys.stderr)
        return 1

    environment = instance_config.get_env_dictionary()
    environment.update(get_spark_env(
        args,
        spark_conf_str,
    ), )

    paasta_print('\nSpark Monitoring URL http://%s:%d\n' %
                 (socket.getfqdn(), spark_ui_port))

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
    )
Esempio n. 19
0
def paasta_local_run(args):
    if args.action == 'build' and not makefile_responds_to('cook-image'):
        sys.stderr.write("A local Makefile with a 'cook-image' target is required for --build\n")
        sys.stderr.write("If you meant to pull the docker image from the registry, explicitly pass --pull\n")
        return 1

    try:
        system_paasta_config = load_system_paasta_config()
    except PaastaNotConfiguredError:
        sys.stdout.write(PaastaColors.yellow(
            "Warning: Couldn't load config files from '/etc/paasta'. This indicates\n"
            "PaaSTA is not configured locally on this host, and local-run may not behave\n"
            "the same way it would behave on a server configured for PaaSTA.\n"
        ))
        system_paasta_config = SystemPaastaConfig({"volumes": []}, '/etc/paasta')

    local_run_config = system_paasta_config.get_local_run_config()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
    if args.cluster:
        cluster = args.cluster
    else:
        try:
            cluster = local_run_config['default_cluster']
        except KeyError:
            sys.stderr.write(PaastaColors.red(
                "PaaSTA on this machine has not been configured with a default cluster.\n"
                "Please pass one to local-run using '-c'.\n"))
            return 1
    instance = args.instance
    docker_client = get_docker_client()

    if args.action == 'build':
        default_tag = 'paasta-local-run-%s-%s' % (service, get_username())
        tag = os.environ.get('DOCKER_TAG', default_tag)
        os.environ['DOCKER_TAG'] = tag
        pull_image = False
        cook_return = paasta_cook_image(args=None, service=service, soa_dir=args.yelpsoa_config_root)
        if cook_return != 0:
            return cook_return
    elif args.action == 'dry_run':
        pull_image = False
        tag = None
    else:
        pull_image = True
        tag = None

    try:
        configure_and_run_docker_container(
            docker_client=docker_client,
            docker_hash=tag,
            service=service,
            instance=instance,
            cluster=cluster,
            args=args,
            pull_image=pull_image,
            system_paasta_config=system_paasta_config,
            dry_run=args.action == 'dry_run',
        )
    except errors.APIError as e:
        sys.stderr.write('Can\'t run Docker container. Error: %s\n' % str(e))
        return 1
Esempio n. 20
0
def paasta_local_run(args):
    if args.action == "pull" and os.geteuid() != 0 and not docker_config_available():
        paasta_print("Re-executing paasta local-run --pull with sudo..")
        os.execvp("sudo", ["sudo", "-H"] + sys.argv)
    if args.action == "build" and not makefile_responds_to("cook-image"):
        paasta_print(
            "A local Makefile with a 'cook-image' target is required for --build",
            file=sys.stderr,
        )
        paasta_print(
            "If you meant to pull the docker image from the registry, explicitly pass --pull",
            file=sys.stderr,
        )
        return 1

    try:
        system_paasta_config = load_system_paasta_config()
    except PaastaNotConfiguredError:
        paasta_print(
            PaastaColors.yellow(
                "Warning: Couldn't load config files from '/etc/paasta'. This indicates"
                "PaaSTA is not configured locally on this host, and local-run may not behave"
                "the same way it would behave on a server configured for PaaSTA."
            ),
            sep="\n",
        )
        system_paasta_config = SystemPaastaConfig({"volumes": []}, "/etc/paasta")

    local_run_config = system_paasta_config.get_local_run_config()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
    if args.cluster:
        cluster = args.cluster
    else:
        try:
            cluster = local_run_config["default_cluster"]
        except KeyError:
            paasta_print(
                PaastaColors.red(
                    "PaaSTA on this machine has not been configured with a default cluster."
                    "Please pass one to local-run using '-c'."
                ),
                sep="\n",
                file=sys.stderr,
            )
            return 1
    instance = args.instance
    docker_client = get_docker_client()

    docker_sha = None
    docker_url = None

    if args.action == "build":
        default_tag = "paasta-local-run-{}-{}".format(service, get_username())
        docker_url = os.environ.get("DOCKER_TAG", default_tag)
        os.environ["DOCKER_TAG"] = docker_url
        pull_image = False
        cook_return = paasta_cook_image(
            args=None, service=service, soa_dir=args.yelpsoa_config_root
        )
        if cook_return != 0:
            return cook_return
    elif args.action == "dry_run":
        pull_image = False
        docker_url = None
        docker_sha = args.sha
    else:
        pull_image = True
        docker_url = None
        docker_sha = args.sha

    try:
        return configure_and_run_docker_container(
            docker_client=docker_client,
            docker_url=docker_url,
            docker_sha=docker_sha,
            service=service,
            instance=instance,
            cluster=cluster,
            args=args,
            pull_image=pull_image,
            system_paasta_config=system_paasta_config,
            dry_run=args.action == "dry_run",
        )
    except errors.APIError as e:
        paasta_print("Can't run Docker container. Error: %s" % str(e), file=sys.stderr)
        return 1
Esempio n. 21
0
def get_container_name():
    return 'paasta_local_run_%s_%s' % (get_username(), randint(1, 999999))
Esempio n. 22
0
def paasta_cook_image(
    args: Optional[argparse.Namespace],
    service: Optional[str] = None,
    soa_dir: Optional[str] = None,
) -> int:
    """Build a docker image"""
    if not service:
        if args is None:
            print(
                "ERROR: No arguments or service passed to cook-image - unable to determine what service to cook an image for",
                file=sys.stderr,
            )
            return 1
        service = args.service
    if service and service.startswith("services-"):
        service = service.split("services-", 1)[1]
    if not soa_dir:
        if args is None:
            print(
                "ERROR: No arguments or soadir passed to cook-image - unable to determine where to look for soa-configs",
                file=sys.stderr,
            )
            return 1
        soa_dir = args.yelpsoa_config_root

    validate_service_name(service, soa_dir)

    run_env = os.environ.copy()
    if args is not None and args.commit is not None:
        # if we're given a commit, we're likely being called by Jenkins or someone
        # trying to push the cooked image to our registry - as such, we should tag
        # the cooked image as `paasta itest` would.
        tag = build_docker_tag(service, args.commit, args.image_version)
    else:
        default_tag = "paasta-cook-image-{}-{}".format(service, get_username())
        tag = run_env.get("DOCKER_TAG", default_tag)
    run_env["DOCKER_TAG"] = tag

    if not makefile_responds_to("cook-image"):
        print(
            "ERROR: local-run now requires a cook-image target to be present in the Makefile. See "
            "http://paasta.readthedocs.io/en/latest/about/contract.html.",
            file=sys.stderr,
        )
        return 1

    try:
        cmd = "make cook-image"
        returncode, output = _run(
            cmd,
            env=run_env,
            log=True,
            component="build",
            service=service,
            loglevel="debug",
        )
        if returncode != 0:
            _log(
                service=service,
                line="ERROR: make cook-image failed for %s." % service,
                component="build",
                level="event",
            )
        else:
            action_details = {"tag": tag}
            _log_audit(
                action="cook-image", action_details=action_details, service=service
            )
        return returncode

    except KeyboardInterrupt:
        print("\nProcess interrupted by the user. Cancelling.", file=sys.stderr)
        return 2
Esempio n. 23
0
def get_container_name():
    return "paasta_local_run_{}_{}".format(get_username(), randint(1, 999999))
Esempio n. 24
0
def configure_and_run_docker_container(args, docker_img, instance_config,
                                       system_paasta_config):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume["hostPath"]):
            volumes.append("{}:{}:{}".format(volume["hostPath"],
                                             volume["containerPath"],
                                             volume["mode"].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume["hostPath"]),
                file=sys.stderr,
            )

    spark_ui_port = pick_random_port(args.service + str(os.getpid()))
    spark_app_name = "paasta_spark_run_{}".format(get_username())
    container_name = spark_app_name + "_" + str(spark_ui_port)
    original_docker_cmd = args.cmd or instance_config.get_cmd()
    if "jupyter" not in original_docker_cmd:
        spark_app_name = container_name

    access_key, secret_key = get_aws_credentials(args)
    spark_config_dict = get_spark_config(
        args=args,
        spark_app_name=spark_app_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
        access_key=access_key,
        secret_key=secret_key,
    )
    spark_conf_str = create_spark_config_str(spark_config_dict,
                                             is_mrjob=args.mrjob)

    # Spark client specific volumes
    volumes.append("%s:rw" % args.work_dir)
    volumes.append("/etc/passwd:/etc/passwd:ro")
    volumes.append("/etc/group:/etc/group:ro")
    volumes.append("/nail/home:/nail/home:rw")

    environment = instance_config.get_env_dictionary()
    environment.update(
        get_spark_env(args, spark_conf_str, spark_ui_port, access_key,
                      secret_key))

    webui_url = f"http://{socket.getfqdn()}:{spark_ui_port}"

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if "history-server" in docker_cmd:
        paasta_print(f"\nSpark history server URL {webui_url}\n")
    elif any(c in docker_cmd
             for c in ["pyspark", "spark-shell", "spark-submit"]):
        paasta_print(f"\nSpark monitoring URL {webui_url}\n")

    if clusterman_metrics and _should_emit_resource_requirements(
            docker_cmd, args.mrjob):
        try:
            emit_resource_requirements(spark_config_dict, args.cluster,
                                       webui_url)
        except Boto3Error as e:
            paasta_print(
                PaastaColors.red(
                    f"Encountered {e} while attempting to send resource requirements to Clusterman."
                ))
            if args.suppress_clusterman_metrics_errors:
                paasta_print(
                    "Continuing anyway since --suppress-clusterman-metrics-errors was passed"
                )
            else:
                raise

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
        nvidia=args.nvidia,
    )
Esempio n. 25
0
def get_spark_config(
    args,
    spark_app_name,
    spark_ui_port,
    docker_img,
    system_paasta_config,
    volumes,
):
    # User configurable Spark options
    user_args = {
        'spark.app.name': spark_app_name,
        'spark.cores.max': '4',
        'spark.executor.cores': '2',
        'spark.executor.memory': '4g',
        # Use \; for multiple constraints. e.g.
        # instance_type:m4.10xlarge\;pool:default
        'spark.mesos.constraints': 'pool:%s' % args.pool,
        'spark.mesos.executor.docker.forcePullImage': 'true',
    }

    # Spark options managed by PaaSTA
    cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
        cluster=args.cluster)
    mesos_address = '{}:{}'.format(
        find_mesos_leader(cluster_fqdn),
        MESOS_MASTER_PORT,
    )
    non_user_args = {
        'spark.master':
        'mesos://%s' % mesos_address,
        'spark.ui.port':
        spark_ui_port,
        'spark.executorEnv.PAASTA_SERVICE':
        args.service,
        'spark.executorEnv.PAASTA_INSTANCE':
        '{}_{}'.format(args.instance, get_username()),
        'spark.executorEnv.PAASTA_CLUSTER':
        args.cluster,
        'spark.mesos.executor.docker.parameters':
        'label=paasta_service={},label=paasta_instance={}_{}'.format(
            args.service,
            args.instance,
            get_username(),
        ),
        'spark.mesos.executor.docker.volumes':
        ','.join(volumes),
        'spark.mesos.executor.docker.image':
        docker_img,
        'spark.mesos.principal':
        'spark',
        'spark.mesos.secret':
        _load_mesos_secret(),
        # derby.system.home property defaulting to '.',
        # which requires directory permission changes.
        'spark.driver.extraJavaOptions':
        '-Dderby.system.home=/tmp/derby',
    }

    if not args.build and not args.image:
        non_user_args['spark.mesos.uris'] = 'file:///root/.dockercfg'

    if args.spark_args:
        spark_args = args.spark_args.split()
        for spark_arg in spark_args:
            fields = spark_arg.split('=')
            if len(fields) != 2:
                paasta_print(
                    PaastaColors.red(
                        "Spark option %s is not in format option=value." %
                        spark_arg, ),
                    file=sys.stderr,
                )
                sys.exit(1)

            if fields[0] in non_user_args:
                paasta_print(
                    PaastaColors.red(
                        "Spark option {} is set by PaaSTA with {}.".format(
                            fields[0],
                            non_user_args[fields[0]],
                        ), ),
                    file=sys.stderr,
                )
                sys.exit(1)
            # Update default configuration
            user_args[fields[0]] = fields[1]

    if int(user_args['spark.cores.max']) < int(
            user_args['spark.executor.cores']):
        paasta_print(
            PaastaColors.red(
                "Total number of cores {} is less than per-executor cores {}.".
                format(
                    user_args['spark.cores.max'],
                    user_args['spark.executor.cores'],
                ), ),
            file=sys.stderr,
        )
        sys.exit(1)

    exec_mem = user_args['spark.executor.memory']
    if exec_mem[-1] != 'g' or not exec_mem[:-1].isdigit() or int(
            exec_mem[:-1]) > 32:
        paasta_print(
            PaastaColors.red(
                "Executor memory {} not in format dg (d<=32).".format(
                    user_args['spark.executor.memory'], ), ),
            file=sys.stderr,
        )
        sys.exit(1)

    # Limit a container's cpu usage
    non_user_args[
        'spark.mesos.executor.docker.parameters'] += ',cpus={}'.format(
            user_args['spark.executor.cores'])

    return dict(non_user_args, **user_args)
Esempio n. 26
0
def configure_and_run_docker_container(
    args,
    docker_img,
    instance_config,
    system_paasta_config,
):
    volumes = list()
    for volume in instance_config.get_volumes(
            system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('{}:{}:{}'.format(volume['hostPath'],
                                             volume['containerPath'],
                                             volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume['hostPath'], ), )

    spark_ui_port = pick_random_port(args.service + str(os.getpid()))
    spark_app_name = 'paasta_spark_run_{}'.format(get_username())
    container_name = spark_app_name + "_" + str(spark_ui_port)
    original_docker_cmd = args.cmd or instance_config.get_cmd()
    if 'jupyter' not in original_docker_cmd:
        spark_app_name = container_name

    spark_config_dict = get_spark_config(
        args=args,
        spark_app_name=spark_app_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
    )
    spark_conf_str = create_spark_config_str(spark_config_dict)

    # Spark client specific volumes
    volumes.append('%s:rw' % args.work_dir)
    volumes.append('/etc/passwd:/etc/passwd:ro')
    volumes.append('/etc/group:/etc/group:ro')

    environment = instance_config.get_env_dictionary()
    environment.update(get_spark_env(
        args,
        spark_conf_str,
        spark_ui_port,
    ), )

    webui_url = f'http://{socket.getfqdn()}:{spark_ui_port}'

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if 'history-server' in docker_cmd:
        paasta_print(f'\nSpark history server URL {webui_url}\n')
    elif any(c in docker_cmd
             for c in ['pyspark', 'spark-shell', 'spark-submit']):
        paasta_print(f'\nSpark monitoring URL {webui_url}\n')

    if clusterman_metrics and _should_emit_resource_requirements(docker_cmd):
        try:
            emit_resource_requirements(spark_config_dict, args.cluster,
                                       webui_url)
        except Boto3Error as e:
            paasta_print(
                PaastaColors.red(
                    f'Encountered {e} while attempting to send resource requirements to Clusterman.'
                ), )
            if args.suppress_clusterman_metrics_errors:
                paasta_print(
                    'Continuing anyway since --suppress-clusterman-metrics-errors was passed'
                )
            else:
                raise

    return run_docker_container(
        container_name=container_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
    )
Esempio n. 27
0
def paasta_local_run(args):
    if args.action == 'build' and not makefile_responds_to('cook-image'):
        paasta_print("A local Makefile with a 'cook-image' target is required for --build", file=sys.stderr)
        paasta_print("If you meant to pull the docker image from the registry, explicitly pass --pull", file=sys.stderr)
        return 1

    try:
        system_paasta_config = load_system_paasta_config()
    except PaastaNotConfiguredError:
        paasta_print(
            PaastaColors.yellow(
                "Warning: Couldn't load config files from '/etc/paasta'. This indicates"
                "PaaSTA is not configured locally on this host, and local-run may not behave"
                "the same way it would behave on a server configured for PaaSTA."
            ),
            sep='\n',
        )
        system_paasta_config = SystemPaastaConfig({"volumes": []}, '/etc/paasta')

    local_run_config = system_paasta_config.get_local_run_config()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
    if args.cluster:
        cluster = args.cluster
    else:
        try:
            cluster = local_run_config['default_cluster']
        except KeyError:
            paasta_print(
                PaastaColors.red(
                    "PaaSTA on this machine has not been configured with a default cluster."
                    "Please pass one to local-run using '-c'."),
                sep='\n',
                file=sys.stderr,
            )
            return 1
    instance = args.instance
    docker_client = get_docker_client()

    if args.action == 'build':
        default_tag = 'paasta-local-run-%s-%s' % (service, get_username())
        tag = os.environ.get('DOCKER_TAG', default_tag)
        os.environ['DOCKER_TAG'] = tag
        pull_image = False
        cook_return = paasta_cook_image(args=None, service=service, soa_dir=args.yelpsoa_config_root)
        if cook_return != 0:
            return cook_return
    elif args.action == 'dry_run':
        pull_image = False
        tag = None
    else:
        pull_image = True
        tag = None

    try:
        return configure_and_run_docker_container(
            docker_client=docker_client,
            docker_hash=tag,
            service=service,
            instance=instance,
            cluster=cluster,
            args=args,
            pull_image=pull_image,
            system_paasta_config=system_paasta_config,
            dry_run=args.action == 'dry_run',
        )
    except errors.APIError as e:
        paasta_print(
            'Can\'t run Docker container. Error: %s' % str(e),
            file=sys.stderr,
        )
        return 1
Esempio n. 28
0
def get_spark_config(
    args,
    spark_app_name,
    spark_ui_port,
    docker_img,
    system_paasta_config,
    volumes,
    access_key,
    secret_key,
):
    # User configurable Spark options
    user_args = {
        "spark.app.name": spark_app_name,
        "spark.cores.max": "4",
        "spark.executor.cores": "2",
        "spark.executor.memory": "4g",
        # Use \; for multiple constraints. e.g.
        # instance_type:m4.10xlarge\;pool:default
        "spark.mesos.constraints": "pool:%s" % args.pool,
        "spark.mesos.executor.docker.forcePullImage": "true",
    }

    default_event_log_dir = get_default_event_log_dir(access_key, secret_key)
    if default_event_log_dir is not None:
        user_args["spark.eventLog.enabled"] = "true"
        user_args["spark.eventLog.dir"] = default_event_log_dir

    # Spark options managed by PaaSTA
    cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
        cluster=args.cluster)
    mesos_address = "{}:{}".format(find_mesos_leader(cluster_fqdn),
                                   MESOS_MASTER_PORT)
    non_user_args = {
        "spark.master":
        "mesos://%s" % mesos_address,
        "spark.ui.port":
        spark_ui_port,
        "spark.executorEnv.PAASTA_SERVICE":
        args.service,
        "spark.executorEnv.PAASTA_INSTANCE":
        "{}_{}".format(args.instance, get_username()),
        "spark.executorEnv.PAASTA_CLUSTER":
        args.cluster,
        "spark.executorEnv.PAASTA_INSTANCE_TYPE":
        "spark",
        "spark.mesos.executor.docker.parameters":
        "label=paasta_service={},label=paasta_instance={}_{}".format(
            args.service, args.instance, get_username()),
        "spark.mesos.executor.docker.volumes":
        ",".join(volumes),
        "spark.mesos.executor.docker.image":
        docker_img,
        "spark.mesos.principal":
        "spark",
        "spark.mesos.secret":
        _load_mesos_secret(),
        # derby.system.home property defaulting to '.',
        # which requires directory permission changes.
        "spark.driver.extraJavaOptions":
        "-Dderby.system.home=/tmp/derby",
    }

    if not args.build and not args.image:
        non_user_args["spark.mesos.uris"] = "file:///root/.dockercfg"

    if args.spark_args:
        spark_args = args.spark_args.split()
        for spark_arg in spark_args:
            fields = spark_arg.split("=")
            if len(fields) != 2:
                paasta_print(
                    PaastaColors.red(
                        "Spark option %s is not in format option=value." %
                        spark_arg),
                    file=sys.stderr,
                )
                sys.exit(1)

            if fields[0] in non_user_args:
                paasta_print(
                    PaastaColors.red(
                        "Spark option {} is set by PaaSTA with {}.".format(
                            fields[0], non_user_args[fields[0]])),
                    file=sys.stderr,
                )
                sys.exit(1)
            # Update default configuration
            user_args[fields[0]] = fields[1]

    if "spark.sql.shuffle.partitions" not in user_args:
        num_partitions = str(2 * int(user_args["spark.cores.max"]))
        user_args["spark.sql.shuffle.partitions"] = num_partitions
        paasta_print(
            PaastaColors.yellow(
                f"Warning: spark.sql.shuffle.partitions has been set to"
                f" {num_partitions} to be equal to twice the number of "
                f"requested cores, but you should consider setting a "
                f"higher value if necessary."))

    if int(user_args["spark.cores.max"]) < int(
            user_args["spark.executor.cores"]):
        paasta_print(
            PaastaColors.red(
                "Total number of cores {} is less than per-executor cores {}.".
                format(user_args["spark.cores.max"],
                       user_args["spark.executor.cores"])),
            file=sys.stderr,
        )
        sys.exit(1)

    exec_mem = user_args["spark.executor.memory"]
    if exec_mem[-1] != "g" or not exec_mem[:-1].isdigit() or int(
            exec_mem[:-1]) > 32:
        paasta_print(
            PaastaColors.red(
                "Executor memory {} not in format dg (d<=32).".format(
                    user_args["spark.executor.memory"])),
            file=sys.stderr,
        )
        sys.exit(1)

    # Limit a container's cpu usage
    non_user_args[
        "spark.mesos.executor.docker.parameters"] += ",cpus={}".format(
            user_args["spark.executor.cores"])

    return dict(non_user_args, **user_args)