Beispiel #1
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    job_conf = JobConfig()

    job_conf.merge(args.job_id, args.job_conf)

    # by default, load spark configuration files in .aztk/
    spark_configuration = config.load_aztk_spark_config()
    # overwrite with values in job_conf if they exist
    if job_conf.spark_defaults_conf:
        spark_configuration.spark_defaults_conf = job_conf.spark_defaults_conf
    if job_conf.spark_env_sh:
        spark_configuration.spark_env_sh = job_conf.spark_env_sh
    if job_conf.core_site_xml:
        spark_configuration.core_site_xml = job_conf.core_site_xml

    job_configuration = aztk.spark.models.JobConfiguration(
        id=job_conf.id,
        applications=job_conf.applications,
        spark_configuration=spark_configuration,
        vm_size=job_conf.vm_size,
        toolkit=job_conf.toolkit,
        max_dedicated_nodes=job_conf.max_dedicated_nodes,
        max_low_pri_nodes=job_conf.max_low_pri_nodes,
        subnet_id=job_conf.subnet_id,
        plugins=job_conf.plugins,
        worker_on_master=job_conf.worker_on_master,
        scheduling_target=job_conf.scheduling_target,
    )

    # TODO: utils.print_job_conf(job_configuration)
    spark_client.job.submit(job_configuration, args.vm_os_ver)
Beispiel #2
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    job_conf = JobConfig()

    job_conf.merge(args.job_id, args.job_conf)

    # by default, load spark configuration files in .aztk/
    spark_configuration = config.load_aztk_spark_config()
    # overwrite with values in job_conf if they exist
    if job_conf.spark_defaults_conf:
        spark_configuration.spark_defaults_conf = job_conf.spark_defaults_conf
    if job_conf.spark_env_sh:
        spark_configuration.spark_env_sh = job_conf.spark_env_sh
    if job_conf.core_site_xml:
        spark_configuration.core_site_xml = job_conf.core_site_xml

    job_configuration = aztk.spark.models.JobConfiguration(
        id=job_conf.id,
        applications=job_conf.applications,
        custom_scripts=job_conf.custom_scripts,
        spark_configuration=spark_configuration,
        vm_size=job_conf.vm_size,
        docker_repo=job_conf.docker_repo,
        max_dedicated_nodes=job_conf.max_dedicated_nodes,
        max_low_pri_nodes=job_conf.max_low_pri_nodes,
        subnet_id=job_conf.subnet_id,
        worker_on_master=job_conf.worker_on_master)

    #TODO: utils.print_job_conf(job_configuration)
    spark_client.submit_job(job_configuration)
Beispiel #3
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config(args.spark_conf)

    # read cluster.yaml configuration file, overwrite values with args
    file_config, wait = config.read_cluster_config() if args.cluster_path is None \
        else config.read_cluster_config(args.cluster_path)
    cluster_conf.merge(file_config)

    cluster_conf.merge(
        ClusterConfiguration(
            cluster_id=args.cluster_id,
            size=args.size,
            size_low_priority=args.size_low_priority,
            vm_size=args.vm_size,
            subnet_id=args.subnet_id,
            user_configuration=UserConfiguration(username=args.username,
                                                 password=args.password),
        ))

    if cluster_conf.toolkit:
        if args.docker_repo:
            cluster_conf.toolkit.docker_repo = args.docker_repo
        if args.docker_run_options:
            cluster_conf.toolkit.docker_run_options = args.docker_run_options

    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(
            spark_client.secrets_configuration.ssh_pub_key,
            user_configuration.username,
            user_configuration.password,
            spark_client.secrets_configuration,
        )
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key)
    else:
        cluster_conf.user_configuration = None

    cluster_conf.validate()
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
        cluster = spark_client.cluster.create(
            cluster_configuration=cluster_conf,
            vm_ver=args.vm_os_ver,
            wait=wait)

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #4
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuration file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    if args.size_low_pri is not None:
        deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.")
        args.size_low_priority = args.size_low_pri

    cluster_conf.merge(ClusterConfiguration(
        cluster_id=args.cluster_id,
        size=args.size,
        size_low_priority=args.size_low_priority,
        vm_size=args.vm_size,
        subnet_id=args.subnet_id,
        user_configuration=UserConfiguration(
            username=args.username,
            password=args.password,
        )))

    if args.docker_repo and cluster_conf.toolkit:
        cluster_conf.toolkit.docker_repo = args.docker_repo

    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
                                                        spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key
        )
    else:
        cluster_conf.user_configuration = None

    cluster_conf.validate()
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
        cluster = spark_client.create_cluster(
            cluster_conf,
            wait=wait
        )

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #5
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuartion file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    cluster_conf.merge(ClusterConfiguration(
        cluster_id=args.cluster_id,
        vm_count=args.size,
        vm_low_pri_count=args.size_low_pri,
        vm_size=args.vm_size,
        subnet_id=args.subnet_id,
        user_configuration=UserConfiguration(
            username=args.username,
            password=args.password,
        ),
        docker_repo=args.docker_repo))
    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
                                                        spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key
        )
    else:
        cluster_conf.user_configuration = None

    utils.print_cluster_conf(cluster_conf, wait)
    spinner = utils.Spinner()
    spinner.start()

    # create spark cluster
    cluster = spark_client.create_cluster(
        cluster_conf,
        wait=wait
    )

    spinner.stop()

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
Beispiel #6
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    job_conf = JobConfig()

    job_conf.merge(args.job_id, args.job_conf)

    aztk_applications = []
    for application in job_conf.applications:
        aztk_applications.append(
            aztk.spark.models.ApplicationConfiguration(
                name=application.get('name'),
                application=application.get('application'),
                application_args=application.get('application_args'),
                main_class=application.get('main_class'),
                jars=[],
                py_files=[],
                files=[],
                driver_java_options=application.get('driver_java_options'),
                driver_library_path=application.get('driver_library_path'),
                driver_class_path=application.get('driver_class_path'),
                driver_memory=application.get('driver_memory'),
                executor_memory=application.get('executor_memory'),
                driver_cores=application.get('driver_cores'),
                executor_cores=application.get('executor_cores')))

    # by default, load spark configuration files in .aztk/
    spark_configuration = config.load_aztk_spark_config()
    # overwrite with values in job_conf if they exist
    if job_conf.spark_defaults_conf:
        spark_configuration.spark_defaults_conf = job_conf.spark_defaults_conf
    if job_conf.spark_env_sh:
        spark_configuration.spark_env_sh = job_conf.spark_env_sh
    if job_conf.core_site_xml:
        spark_configuration.core_site_xml = job_conf.core_site_xml

    job_configuration = aztk.spark.models.JobConfiguration(
        id=job_conf.id,
        applications=aztk_applications,
        custom_scripts=job_conf.custom_scripts,
        spark_configuration=spark_configuration,
        vm_size=job_conf.vm_size,
        docker_repo=job_conf.docker_repo,
        max_dedicated_nodes=job_conf.max_dedicated_nodes,
        max_low_pri_nodes=job_conf.max_low_pri_nodes,
        subnet_id=job_conf.subnet_id,
        worker_on_master=job_conf.worker_on_master)

    #TODO: utils.print_job_conf(job_configuration)
    spark_client.submit_job(job_configuration)