Beispiel #1
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    timestr = time.strftime("%Y%m%d-%H%M%S")

    if not args.output:
        args.output = os.path.join(os.getcwd(), "debug-{0}-{1}".format(args.cluster_id, timestr))
    with utils.Spinner():
        spark_client.cluster.diagnostics(id=args.cluster_id, output_directory=args.output, brief=args.brief)
Beispiel #2
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    with utils.Spinner():
        copy_output = spark_client.cluster.copy(
            id=args.cluster_id, source_path=args.source_path, destination_path=args.dest_path, internal=args.internal)
    for node_output in copy_output:
        utils.log_node_copy_output(node_output)
    sys.exit(0 if not any([node_output.error for node_output in copy_output]) else 1)
Beispiel #3
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config(args.spark_conf)

    # read cluster.yaml configuration file, overwrite values with args
    file_config, wait = config.read_cluster_config() if args.cluster_path is None \
        else config.read_cluster_config(args.cluster_path)
    cluster_conf.merge(file_config)

    cluster_conf.merge(
        ClusterConfiguration(
            cluster_id=args.cluster_id,
            size=args.size,
            size_low_priority=args.size_low_priority,
            vm_size=args.vm_size,
            subnet_id=args.subnet_id,
            user_configuration=UserConfiguration(username=args.username,
                                                 password=args.password),
        ))

    if cluster_conf.toolkit:
        if args.docker_repo:
            cluster_conf.toolkit.docker_repo = args.docker_repo
        if args.docker_run_options:
            cluster_conf.toolkit.docker_run_options = args.docker_run_options

    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(
            spark_client.secrets_configuration.ssh_pub_key,
            user_configuration.username,
            user_configuration.password,
            spark_client.secrets_configuration,
        )
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key)
    else:
        cluster_conf.user_configuration = None

    cluster_conf.validate()
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
        cluster = spark_client.cluster.create(
            cluster_configuration=cluster_conf,
            vm_ver=args.vm_os_ver,
            wait=wait)

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #4
0
def execute(args: typing.NamedTuple):
    if not args.wait and args.output:
        raise aztk.error.AztkError("--output flag requires --wait flag")

    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    jars = []
    py_files = []
    files = []

    if args.jars is not None:
        jars = args.jars.replace(" ", "").split(",")

    if args.py_files is not None:
        py_files = args.py_files.replace(" ", "").split(",")

    if args.files is not None:
        files = args.files.replace(" ", "").split(",")

    log_application(args, jars, py_files, files)

    spark_client.cluster.submit(
        id=args.cluster_id,
        application=aztk.spark.models.ApplicationConfiguration(
            name=args.name,
            application=args.app,
            application_args=args.app_args,
            main_class=args.main_class,
            jars=jars,
            py_files=py_files,
            files=files,
            driver_java_options=args.driver_java_options,
            driver_library_path=args.driver_library_path,
            driver_class_path=args.driver_class_path,
            driver_memory=args.driver_memory,
            executor_memory=args.executor_memory,
            driver_cores=args.driver_cores,
            executor_cores=args.executor_cores,
            max_retry_count=args.max_retry_count,
        ),
        remote=args.remote,
        internal=args.internal,
        wait=False,
    )

    if args.wait:
        if not args.output:
            exit_code = utils.stream_logs(client=spark_client, cluster_id=args.cluster_id, application_name=args.name)
        else:
            with utils.Spinner():
                spark_client.cluster.wait(id=args.cluster_id, application_name=args.name)
                application_log = spark_client.cluster.get_application_log(
                    id=args.cluster_id, application_name=args.name)
                with open(os.path.abspath(os.path.expanduser(args.output)), "w", encoding="UTF-8") as f:
                    f.write(application_log.log)
                exit_code = application_log.exit_code

        sys.exit(exit_code)
Beispiel #5
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    with utils.Spinner():
        if args.node_id:
            results = [spark_client.node_run(args.cluster_id, args.node_id, args.command, args.host, args.internal)]
        else:
            results = spark_client.cluster_run(args.cluster_id, args.command, args.host, args.internal)

    [utils.log_execute_result(node_id, result) for node_id, result in results]
Beispiel #6
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuration file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    if args.size_low_pri is not None:
        deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.")
        args.size_low_priority = args.size_low_pri

    cluster_conf.merge(ClusterConfiguration(
        cluster_id=args.cluster_id,
        size=args.size,
        size_low_priority=args.size_low_priority,
        vm_size=args.vm_size,
        subnet_id=args.subnet_id,
        user_configuration=UserConfiguration(
            username=args.username,
            password=args.password,
        )))

    if args.docker_repo and cluster_conf.toolkit:
        cluster_conf.toolkit.docker_repo = args.docker_repo

    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
                                                        spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key
        )
    else:
        cluster_conf.user_configuration = None

    cluster_conf.validate()
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
        cluster = spark_client.create_cluster(
            cluster_conf,
            wait=wait
        )

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #7
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    app_log = spark_client.get_job_application_log(args.job_id, args.app_name)
    if args.output:
        with utils.Spinner():
            with open(os.path.abspath(os.path.expanduser(args.output)),
                      "w",
                      encoding="UTF-8") as f:
                f.write(app_log.log)
    else:
        log.print(app_log.log)
Beispiel #8
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuartion file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    cluster_conf.merge(ClusterConfiguration(
        cluster_id=args.cluster_id,
        vm_count=args.size,
        vm_low_pri_count=args.size_low_pri,
        vm_size=args.vm_size,
        subnet_id=args.subnet_id,
        user_configuration=UserConfiguration(
            username=args.username,
            password=args.password,
        ),
        docker_repo=args.docker_repo))
    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
                                                        spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key
        )
    else:
        cluster_conf.user_configuration = None

    utils.print_cluster_conf(cluster_conf, wait)
    spinner = utils.Spinner()
    spinner.start()

    # create spark cluster
    cluster = spark_client.create_cluster(
        cluster_conf,
        wait=wait
    )

    spinner.stop()

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #9
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    with utils.Spinner():
        copy_output = spark_client.cluster_copy(
            cluster_id=args.cluster_id,
            source_path=args.source_path,
            destination_path=args.dest_path,
            internal=args.internal)
    [
        print_copy_result(node_id, result, err)
        for node_id, result, err in copy_output
    ]
    sys.exit(0 if all([result for _, result, _ in copy_output]) else 1)
Beispiel #10
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())

    if args.tail:
        utils.stream_logs(client=spark_client, cluster_id=args.cluster_id, application_name=args.app_name)
    else:
        app_log = spark_client.cluster.get_application_log(id=args.cluster_id, application_name=args.app_name)
        if args.output:
            with utils.Spinner():
                with open(os.path.abspath(os.path.expanduser(args.output)), "w", encoding="UTF-8") as f:
                    f.write(app_log.log)
        else:
            log.print(app_log.log)
Beispiel #11
0
def execute(args: typing.NamedTuple):
    if not args.wait and args.output:
        raise aztk.error.AztkError("--output flag requires --wait flag")

    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    jars = []
    py_files = []
    files = []

    if args.jars is not None:
        jars = args.jars.replace(' ', '').split(',')

    if args.py_files is not None:
        py_files = args.py_files.replace(' ', '').split(',')

    if args.files is not None:
        files = args.files.replace(' ', '').split(',')

    log.info("-------------------------------------------")
    log.info("Spark cluster id:        %s", args.cluster_id)
    log.info("Spark app name:          %s", args.name)
    log.info("Wait for app completion: %s", args.wait)
    if args.main_class is not None:
        log.info("Entry point class:       %s", args.main_class)
    if jars:
        log.info("JARS:                    %s", jars)
    if py_files:
        log.info("PY_Files:                %s", py_files)
    if files:
        log.info("Files:                   %s", files)
    if args.driver_java_options is not None:
        log.info("Driver java options:     %s", args.driver_java_options)
    if args.driver_library_path is not None:
        log.info("Driver library path:     %s", args.driver_library_path)
    if args.driver_class_path is not None:
        log.info("Driver class path:       %s", args.driver_class_path)
    if args.driver_memory is not None:
        log.info("Driver memory:           %s", args.driver_memory)
    if args.executor_memory is not None:
        log.info("Executor memory:         %s", args.executor_memory)
    if args.driver_cores is not None:
        log.info("Driver cores:            %s", args.driver_cores)
    if args.executor_cores is not None:
        log.info("Executor cores:          %s", args.executor_cores)
    log.info("Application:             %s", args.app)
    log.info("Application arguments:   %s", args.app_args)
    log.info("-------------------------------------------")

    spark_client.submit(cluster_id=args.cluster_id,
                        application=aztk.spark.models.ApplicationConfiguration(
                            name=args.name,
                            application=args.app,
                            application_args=args.app_args,
                            main_class=args.main_class,
                            jars=jars,
                            py_files=py_files,
                            files=files,
                            driver_java_options=args.driver_java_options,
                            driver_library_path=args.driver_library_path,
                            driver_class_path=args.driver_class_path,
                            driver_memory=args.driver_memory,
                            executor_memory=args.executor_memory,
                            driver_cores=args.driver_cores,
                            executor_cores=args.executor_cores,
                            max_retry_count=args.max_retry_count),
                        remote=args.remote,
                        wait=False)

    if args.wait:
        if not args.output:
            exit_code = utils.stream_logs(client=spark_client,
                                          cluster_id=args.cluster_id,
                                          application_name=args.name)
        else:
            with utils.Spinner():
                spark_client.wait_until_application_done(
                    cluster_id=args.cluster_id, task_id=args.name)
                application_log = spark_client.get_application_log(
                    cluster_id=args.cluster_id, application_name=args.name)
                with open(os.path.abspath(os.path.expanduser(args.output)),
                          "w",
                          encoding="UTF-8") as f:
                    f.write(application_log.log)
                exit_code = application_log.exit_code

        sys.exit(exit_code)