Beispiel #1
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster = spark_client.get_cluster(args.cluster_id)
    cluster_config = spark_client.get_cluster_config(args.cluster_id)
    ssh_conf = SshConfig()

    ssh_conf.merge(
        cluster_id=args.cluster_id,
        username=args.username,
        job_ui_port=args.jobui,
        job_history_ui_port=args.jobhistoryui,
        web_ui_port=args.webui,
        host=args.host,
        connect=args.connect,
        internal=args.internal)

    log.info("-------------------------------------------")
    utils.log_property("spark cluster id", ssh_conf.cluster_id)
    utils.log_property("open webui", "{0}{1}".format(http_prefix, ssh_conf.web_ui_port))
    utils.log_property("open jobui", "{0}{1}".format(http_prefix, ssh_conf.job_ui_port))
    utils.log_property("open jobhistoryui", "{0}{1}".format(http_prefix, ssh_conf.job_history_ui_port))
    print_plugin_ports(cluster_config)
    utils.log_property("ssh username", ssh_conf.username)
    utils.log_property("connect", ssh_conf.connect)
    log.info("-------------------------------------------")

    try:
        shell_out_ssh(spark_client, ssh_conf)
    except OSError:
        # no ssh client is found, falling back to pure python
        native_python_ssh_into_master(spark_client, cluster, ssh_conf, args.password)
Beispiel #2
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_ids = args.cluster_ids

    for cluster_id in cluster_ids:
        if not args.force:
            if not args.keep_logs:
                log.warning(
                    "All logs persisted for this cluster will be deleted.")

            confirmation_cluster_id = input(
                "Please confirm the id of the cluster you wish to delete [{}]: "
                .format(cluster_id))

            if confirmation_cluster_id != cluster_id:
                log.error(
                    "Confirmation cluster id does not match. Please try again."
                )
                return

        if spark_client.cluster.delete(id=cluster_id,
                                       keep_logs=args.keep_logs):
            log.info("Deleting cluster %s", cluster_id)
        else:
            log.error(
                "Cluster with id '%s' doesn't exist or was already deleted.",
                cluster_id)
Beispiel #3
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())

    if spark_client.stop_job_app(args.job_id, args.app_name):
        log.info("Stopped app {0}".format(args.app_name))
    else:
        log.error("App with name {0} does not exist or was already deleted")
Beispiel #4
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())

    if spark_client.job.stop_application(args.job_id, args.app_name):
        log.info("Stopped app %s", args.app_name)
    else:
        log.error("App with name %s does not exist or was already deleted",
                  args.app_name)
Beispiel #5
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config(args.spark_conf)

    # read cluster.yaml configuration file, overwrite values with args
    file_config, wait = config.read_cluster_config() if args.cluster_path is None \
        else config.read_cluster_config(args.cluster_path)
    cluster_conf.merge(file_config)

    cluster_conf.merge(
        ClusterConfiguration(
            cluster_id=args.cluster_id,
            size=args.size,
            size_low_priority=args.size_low_priority,
            vm_size=args.vm_size,
            subnet_id=args.subnet_id,
            user_configuration=UserConfiguration(username=args.username,
                                                 password=args.password),
        ))

    if cluster_conf.toolkit:
        if args.docker_repo:
            cluster_conf.toolkit.docker_repo = args.docker_repo
        if args.docker_run_options:
            cluster_conf.toolkit.docker_run_options = args.docker_run_options

    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(
            spark_client.secrets_configuration.ssh_pub_key,
            user_configuration.username,
            user_configuration.password,
            spark_client.secrets_configuration,
        )
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key)
    else:
        cluster_conf.user_configuration = None

    cluster_conf.validate()
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
        cluster = spark_client.cluster.create(
            cluster_configuration=cluster_conf,
            vm_ver=args.vm_os_ver,
            wait=wait)

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #6
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuration file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    if args.size_low_pri is not None:
        deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.")
        args.size_low_priority = args.size_low_pri

    cluster_conf.merge(ClusterConfiguration(
        cluster_id=args.cluster_id,
        size=args.size,
        size_low_priority=args.size_low_priority,
        vm_size=args.vm_size,
        subnet_id=args.subnet_id,
        user_configuration=UserConfiguration(
            username=args.username,
            password=args.password,
        )))

    if args.docker_repo and cluster_conf.toolkit:
        cluster_conf.toolkit.docker_repo = args.docker_repo

    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
                                                        spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key
        )
    else:
        cluster_conf.user_configuration = None

    cluster_conf.validate()
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
        cluster = spark_client.create_cluster(
            cluster_conf,
            wait=wait
        )

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #7
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster = spark_client.get_cluster(args.cluster_id)
    cluster_config = spark_client.get_cluster_config(args.cluster_id)
    ssh_conf = SshConfig()

    ssh_conf.merge(cluster_id=args.cluster_id,
                   username=args.username,
                   job_ui_port=args.jobui,
                   job_history_ui_port=args.jobhistoryui,
                   web_ui_port=args.webui,
                   jupyter_port=args.jupyter,
                   name_node_ui_port=args.namenodeui,
                   rstudio_server_port=args.rstudioserver,
                   host=args.host,
                   connect=args.connect)

    log.info("-------------------------------------------")
    utils.log_property("spark cluster id", ssh_conf.cluster_id)
    utils.log_property("open webui", "{0}{1}".format(http_prefix,
                                                     ssh_conf.web_ui_port))
    utils.log_property("open jobui", "{0}{1}".format(http_prefix,
                                                     ssh_conf.job_ui_port))
    utils.log_property(
        "open jobhistoryui", "{0}{1}".format(http_prefix,
                                             ssh_conf.job_history_ui_port))
    print_plugin_ports(cluster_config)
    utils.log_property("ssh username", ssh_conf.username)
    utils.log_property("connect", ssh_conf.connect)
    log.info("-------------------------------------------")

    # get ssh command
    try:
        ssh_cmd = utils.ssh_in_master(
            client=spark_client,
            cluster_id=ssh_conf.cluster_id,
            webui=ssh_conf.web_ui_port,
            jobui=ssh_conf.job_ui_port,
            jobhistoryui=ssh_conf.job_history_ui_port,
            username=ssh_conf.username,
            host=ssh_conf.host,
            connect=ssh_conf.connect)

        if not ssh_conf.connect:
            log.info("")
            log.info(
                "Use the following command to connect to your spark head node:"
            )
            log.info("\t%s", ssh_cmd)

    except batch_error.BatchErrorException as e:
        if e.error.code == "PoolNotFound":
            raise aztk.error.AztkError(
                "The cluster you are trying to connect to does not exist.")
        else:
            raise
Beispiel #8
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_id = args.cluster_id
    cluster = spark_client.get_cluster(cluster_id)
    utils.print_cluster(spark_client, cluster)

    configuration = spark_client.get_cluster_config(cluster_id)
    if configuration and args.show_config:
        log.info("-------------------------------------------")
        log.info("Cluster configuration:")
        utils.print_cluster_conf(configuration, False)
Beispiel #9
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuartion file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    cluster_conf.merge(ClusterConfiguration(
        cluster_id=args.cluster_id,
        vm_count=args.size,
        vm_low_pri_count=args.size_low_pri,
        vm_size=args.vm_size,
        subnet_id=args.subnet_id,
        user_configuration=UserConfiguration(
            username=args.username,
            password=args.password,
        ),
        docker_repo=args.docker_repo))
    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
                                                        spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key
        )
    else:
        cluster_conf.user_configuration = None

    utils.print_cluster_conf(cluster_conf, wait)
    spinner = utils.Spinner()
    spinner.start()

    # create spark cluster
    cluster = spark_client.create_cluster(
        cluster_conf,
        wait=wait
    )

    spinner.stop()

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #10
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_id = args.cluster_id

    if not args.force:
        confirmation_cluster_id = input("Please confirm the id of the cluster you wish to delete: ")

        if confirmation_cluster_id  != cluster_id:
            log.error("Confirmation cluster id does not match. Please try again.")
            return

    if spark_client.delete_cluster(cluster_id):
        log.info("Deleting cluster %s", cluster_id)
    else:
        log.error("Cluster with id '%s' doesn't exist or was already deleted.", cluster_id)
Beispiel #11
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    job_id = args.job_id

    if not args.force:
        # check if job exists before prompting for confirmation
        spark_client.get_job(job_id)

        confirmation_cluster_id = input("Please confirm the id of the cluster you wish to delete: ")

        if confirmation_cluster_id != job_id:
            log.error("Confirmation cluster id does not match. Please try again.")
            return

    if spark_client.delete_job(job_id):
        log.info("Deleting Job %s", job_id)
    else:
        log.error("Job with id '%s' doesn't exist or was already deleted.", job_id)
Beispiel #12
0
def execute(args: typing.NamedTuple):
    if not args.toolkit_software:
        return print_available_softwares()

    if not validate_software(args.toolkit_software):
        return None

    if not args.version:
        return print_available_software_version(args.toolkit_software)
    if not args.environment:
        print_available_environments(args.toolkit_software)

    toolkit = Toolkit(software=args.toolkit_software,
                      version=args.version,
                      environment=args.environment)

    toolkit.validate()
    log.info("Docker image picked for this toolkit: %s",
             toolkit.get_docker_repo(args.gpu))
    return None
Beispiel #13
0
def shell_out_ssh(spark_client, cluster_configuration, ssh_conf):
    try:
        ssh_cmd = utils.ssh_in_master(
            client=spark_client,
            cluster_id=ssh_conf.cluster_id,
            cluster_configuration=cluster_configuration,
            webui=ssh_conf.web_ui_port,
            jobui=ssh_conf.job_ui_port,
            jobhistoryui=ssh_conf.job_history_ui_port,
            username=ssh_conf.username,
            host=ssh_conf.host,
            connect=ssh_conf.connect,
            internal=ssh_conf.internal,
        )

        if not ssh_conf.connect:
            log.info("")
            log.info(
                "Use the following command to connect to your spark head node:"
            )
            log.info("\t%s", ssh_cmd)

    except batch_error.BatchErrorException as e:
        if e.error.code == "PoolNotFound":
            raise aztk.error.AztkError(
                "The cluster you are trying to connect to does not exist.")
        else:
            raise
Beispiel #14
0
def print_plugin_ports(cluster_config: ClusterConfiguration):

    if cluster_config and cluster_config.plugins:
        plugins = cluster_config.plugins
        has_ports = False
        for plugin in plugins:
            for port in plugin.ports:
                if port.expose_publicly:
                    has_ports = True
                    break

        if has_ports > 0:
            log.info("plugins:")
            for plugin in plugins:
                for port in plugin.ports:
                    if port.expose_publicly:
                        label = "  - open {}".format(plugin.name)

                        if port.name:
                            label += " {}".format(port.name)

                        url = "{0}{1}".format(http_prefix, port.public_port)
                        utils.log_property(label, url)
Beispiel #15
0
def print_plugin_ports(cluster_configuration: ClusterConfiguration):
    if cluster_configuration and cluster_configuration.plugins:
        plugins = cluster_configuration.plugins
        has_ports = False
        plugin_ports = {}
        for plugin in plugins:
            plugin_ports[plugin.name] = []
            for port in plugin.ports:
                if port.expose_publicly:
                    has_ports = True
                    plugin_ports[plugin.name].append(port)

        if has_ports:
            log.info("plugins:")

        for plugin in plugin_ports:
            if plugin_ports[plugin]:
                log.info(" %s ", plugin)
                for port in plugin_ports[plugin]:
                    label = "    - open"
                    if port.name:
                        label += " {}".format(port.name)
                    url = "{0}{1}".format(http_prefix, port.public_port)
                    utils.log_property(label, url)
Beispiel #16
0
def print_available_software_version(software: str):
    toolkit_def = TOOLKIT_MAP.get(software)
    log.info("Available version for %s: ", software)
    for version in toolkit_def.versions:
        log.info("  - %s", version)
Beispiel #17
0
def log_application(args, jars, py_files, files):
    log.info("-------------------------------------------")
    log.info("Spark cluster id:        %s", args.cluster_id)
    log.info("Spark app name:          %s", args.name)
    log.info("Wait for app completion: %s", args.wait)
    if args.main_class is not None:
        log.info("Entry point class:       %s", args.main_class)
    if jars:
        log.info("JARS:                    %s", jars)
    if py_files:
        log.info("PY_Files:                %s", py_files)
    if files:
        log.info("Files:                   %s", files)
    if args.driver_java_options is not None:
        log.info("Driver java options:     %s", args.driver_java_options)
    if args.driver_library_path is not None:
        log.info("Driver library path:     %s", args.driver_library_path)
    if args.driver_class_path is not None:
        log.info("Driver class path:       %s", args.driver_class_path)
    if args.driver_memory is not None:
        log.info("Driver memory:           %s", args.driver_memory)
    if args.executor_memory is not None:
        log.info("Executor memory:         %s", args.executor_memory)
    if args.driver_cores is not None:
        log.info("Driver cores:            %s", args.driver_cores)
    if args.executor_cores is not None:
        log.info("Executor cores:          %s", args.executor_cores)
    log.info("Application:             %s", args.app)
    log.info("Application arguments:   %s", args.app_args)
    log.info("-------------------------------------------")
Beispiel #18
0
def execute(args: typing.NamedTuple):
    if not args.wait and args.output:
        raise aztk.error.AztkError("--output flag requires --wait flag")

    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    jars = []
    py_files = []
    files = []

    if args.jars is not None:
        jars = args.jars.replace(' ', '').split(',')

    if args.py_files is not None:
        py_files = args.py_files.replace(' ', '').split(',')

    if args.files is not None:
        files = args.files.replace(' ', '').split(',')

    log.info("-------------------------------------------")
    log.info("Spark cluster id:        %s", args.cluster_id)
    log.info("Spark app name:          %s", args.name)
    log.info("Wait for app completion: %s", args.wait)
    if args.main_class is not None:
        log.info("Entry point class:       %s", args.main_class)
    if jars:
        log.info("JARS:                    %s", jars)
    if py_files:
        log.info("PY_Files:                %s", py_files)
    if files:
        log.info("Files:                   %s", files)
    if args.driver_java_options is not None:
        log.info("Driver java options:     %s", args.driver_java_options)
    if args.driver_library_path is not None:
        log.info("Driver library path:     %s", args.driver_library_path)
    if args.driver_class_path is not None:
        log.info("Driver class path:       %s", args.driver_class_path)
    if args.driver_memory is not None:
        log.info("Driver memory:           %s", args.driver_memory)
    if args.executor_memory is not None:
        log.info("Executor memory:         %s", args.executor_memory)
    if args.driver_cores is not None:
        log.info("Driver cores:            %s", args.driver_cores)
    if args.executor_cores is not None:
        log.info("Executor cores:          %s", args.executor_cores)
    log.info("Application:             %s", args.app)
    log.info("Application arguments:   %s", args.app_args)
    log.info("-------------------------------------------")

    spark_client.submit(cluster_id=args.cluster_id,
                        application=aztk.spark.models.ApplicationConfiguration(
                            name=args.name,
                            application=args.app,
                            application_args=args.app_args,
                            main_class=args.main_class,
                            jars=jars,
                            py_files=py_files,
                            files=files,
                            driver_java_options=args.driver_java_options,
                            driver_library_path=args.driver_library_path,
                            driver_class_path=args.driver_class_path,
                            driver_memory=args.driver_memory,
                            executor_memory=args.executor_memory,
                            driver_cores=args.driver_cores,
                            executor_cores=args.executor_cores,
                            max_retry_count=args.max_retry_count),
                        remote=args.remote,
                        wait=False)

    if args.wait:
        if not args.output:
            exit_code = utils.stream_logs(client=spark_client,
                                          cluster_id=args.cluster_id,
                                          application_name=args.name)
        else:
            with utils.Spinner():
                spark_client.wait_until_application_done(
                    cluster_id=args.cluster_id, task_id=args.name)
                application_log = spark_client.get_application_log(
                    cluster_id=args.cluster_id, application_name=args.name)
                with open(os.path.abspath(os.path.expanduser(args.output)),
                          "w",
                          encoding="UTF-8") as f:
                    f.write(application_log.log)
                exit_code = application_log.exit_code

        sys.exit(exit_code)
Beispiel #19
0
def execute(args: typing.NamedTuple):
    plugins = plugin_manager.plugins
    log.info("------------------------------------------------------")
    log.info("                   Plugins (%i available)", len(plugins))
    log.info("------------------------------------------------------")
    for name, plugin in plugins.items():
        log.info("- %s", name)
        args = plugin_manager.get_args_for(plugin)
        if args:
            log.info("    Arguments:")
            for arg in args.values():
                log.info("      - %s", arg_str(arg))
        else:
            log.info("    Arguments: None")
        log.info("")
Beispiel #20
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())

    log.info('-------------------------------------------')
    log.info('spark cluster id:    {}'.format(args.cluster_id))
    log.info('username:            {}'.format(args.username))
    log.info('-------------------------------------------')

    if args.ssh_key:
        ssh_key = args.ssh_key
    else:
        ssh_key = spark_client.secrets_config.ssh_pub_key

    ssh_key, password = utils.get_ssh_key_or_prompt(
        ssh_key, args.username, args.password, spark_client.secrets_config)

    spark_client.create_user(cluster_id=args.cluster_id,
                             username=args.username,
                             password=password,
                             ssh_key=ssh_key)

    if password:
        log.info('password:            %s', '*' * len(password))
    elif ssh_key:
        log.info('ssh public key:      %s', ssh_key)

    log.info('-------------------------------------------')
Beispiel #21
0
def print_available_softwares():
    log.info("Available toolkits: ")
    for toolkit in TOOLKIT_MAP:
        log.info("  - %s", toolkit)
Beispiel #22
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())

    log.info("-------------------------------------------")
    log.info("spark cluster id:    %s", args.cluster_id)
    log.info("username:            %s", args.username)
    log.info("-------------------------------------------")

    if args.ssh_key:
        ssh_key = args.ssh_key
    else:
        ssh_key = spark_client.secrets_configuration.ssh_pub_key

    ssh_key, password = utils.get_ssh_key_or_prompt(
        ssh_key, args.username, args.password,
        spark_client.secrets_configuration)

    spark_client.cluster.create_user(id=args.cluster_id,
                                     username=args.username,
                                     password=password,
                                     ssh_key=ssh_key)

    if password:
        log.info("password:            %s", "*" * len(password))
    elif ssh_key:
        log.info("ssh public key:      %s", ssh_key)

    log.info("-------------------------------------------")
Beispiel #23
0
def print_available_environments(software: str):
    toolkit_def = TOOLKIT_MAP.get(software)

    log.info("Available environment for %s: ", software)
    for env in toolkit_def.environments:
        log.info("  - %s", env)