예제 #1
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_screts())

    if spark_client.stop_job_app(args.job_id, args.app_name):
        log.info("Stopped app {0}".format(args.app_name))
    else:
        log.error("App with name {0} does not exist or was already deleted")
예제 #2
0
파일: main.py 프로젝트: IDCH/lha
    def add_file(self, p):
        """ Adds the specified file to the import queue.
        """
        log = self._app.log

        # Sanity checks
        if not p.exists():
            raise IOError((errno.ENOENT, "Cannot add file: does not exist.", p))
        if not p.isfile():
            raise IOError((errno.ENOTDIR, "Cannot add file: not a file.", p))
        if self.rootpath.relpathto(p).startswith(".."):
            raise ValueError("The supplied file is not a child of the document root.")

        if p.ext not in FILETYPES:
            log.debug("Skipping file %s. Filetype not supported." % (p))
            return

        if self.has_been_imported(p):
            # TODO check datestamp to see if it needs to be updated
            log.debug("Skipping file %s. Already processed.")
            return

        log.info("Queueing file for import: '%s'" % (p))
        parser = DocumentFileParser(p, self.rootpath, log)
        parser.parse()
        self.import_queue.append(parser)
예제 #3
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_screts())
    cluster_conf = ClusterConfiguration()
    cluster_conf.spark_configuration = load_aztk_spark_config()

    # read cluster.yaml configuartion file, overwrite values with args
    file_config, wait = config.read_cluster_config()
    cluster_conf.merge(file_config)
    cluster_conf.merge(
        ClusterConfiguration(cluster_id=args.cluster_id,
                             vm_count=args.size,
                             vm_low_pri_count=args.size_low_pri,
                             vm_size=args.vm_size,
                             subnet_id=args.subnet_id,
                             user_configuration=UserConfiguration(
                                 username=args.username,
                                 password=args.password,
                             ),
                             docker_repo=args.docker_repo))
    wait = wait if args.wait is None else args.wait

    user_configuration = cluster_conf.user_configuration

    if user_configuration and user_configuration.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(
            spark_client.secrets_config.ssh_pub_key,
            user_configuration.username, user_configuration.password,
            spark_client.secrets_config)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
            ssh_key=ssh_key)
    else:
        cluster_conf.user_configuration = None

    print_cluster_conf(cluster_conf, wait)
    spinner = utils.Spinner()
    spinner.start()

    # create spark cluster
    cluster = spark_client.create_cluster(cluster_conf, wait=wait)

    spinner.stop()

    if wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
예제 #4
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_screts())
    cluster_id = args.cluster_id

    if not args.force:
        confirmation_cluster_id = input(
            "Please confirm the id of the cluster you wish to delete: ")

        if confirmation_cluster_id != cluster_id:
            log.error(
                "Confirmation cluster id does not match. Please try again.")
            return

    if spark_client.delete_cluster(cluster_id):
        log.info("Deleting cluster %s", cluster_id)
    else:
        log.error("Cluster with id '%s' doesn't exist or was already deleted.",
                  cluster_id)
예제 #5
0
파일: delete.py 프로젝트: skepticatgit/aztk
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_screts())
    job_id = args.job_id

    if not args.force:
        # check if job exists before prompting for confirmation
        spark_client.get_job(job_id)

        confirmation_cluster_id = input(
            "Please confirm the id of the cluster you wish to delete: ")

        if confirmation_cluster_id != job_id:
            log.error(
                "Confirmation cluster id does not match. Please try again.")
            return

    if spark_client.delete_job(job_id):
        log.info("Deleting Job %s", job_id)
    else:
        log.error("Job with id '%s' doesn't exist or was already deleted.",
                  job_id)
예제 #6
0
def execute(args: typing.NamedTuple):
    spark_client = load_spark_client()
    jars = []
    py_files = []
    files = []

    if args.jars is not None:
        jars = args.jars.replace(' ', '').split(',')

    if args.py_files is not None:
        py_files = args.py_files.replace(' ', '').split(',')

    if args.files is not None:
        files = args.files.replace(' ', '').split(',')

    log.info("-------------------------------------------")
    log.info("Spark cluster id:        %s", args.cluster_id)
    log.info("Spark app name:          %s", args.name)
    log.info("Wait for app completion: %s", args.wait)
    if args.main_class is not None:
        log.info("Entry point class:       %s", args.main_class)
    if jars:
        log.info("JARS:                    %s", jars)
    if py_files:
        log.info("PY_Files:                %s", py_files)
    if files:
        log.info("Files:                   %s", files)
    if args.driver_java_options is not None:
        log.info("Driver java options:     %s", args.driver_java_options)
    if args.driver_library_path is not None:
        log.info("Driver library path:     %s", args.driver_library_path)
    if args.driver_class_path is not None:
        log.info("Driver class path:       %s", args.driver_class_path)
    if args.driver_memory is not None:
        log.info("Driver memory:           %s", args.driver_memory)
    if args.executor_memory is not None:
        log.info("Executor memory:         %s", args.executor_memory)
    if args.driver_cores is not None:
        log.info("Driver cores:            %s", args.driver_cores)
    if args.executor_cores is not None:
        log.info("Executor cores:          %s", args.executor_cores)
    log.info("Application:             %s", args.app)
    log.info("Application arguments:   %s", args.app_args)
    log.info("-------------------------------------------")

    spark_client.submit(cluster_id=args.cluster_id,
                        application=aztk.spark.models.Application(
                            name=args.name,
                            application=args.app,
                            application_args=args.app_args,
                            main_class=args.main_class,
                            jars=jars,
                            py_files=py_files,
                            files=files,
                            driver_java_options=args.driver_java_options,
                            driver_library_path=args.driver_library_path,
                            driver_class_path=args.driver_class_path,
                            driver_memory=args.driver_memory,
                            executor_memory=args.executor_memory,
                            driver_cores=args.driver_cores,
                            executor_cores=args.executor_cores,
                            max_retry_count=args.max_retry_count),
                        wait=False)

    if args.wait:
        utils.stream_logs(client=spark_client,
                          cluster_id=args.cluster_id,
                          application_name=args.name)
예제 #7
0
def print_cluster_conf(cluster_conf: ClusterConfiguration, wait: bool):
    user_configuration = cluster_conf.user_configuration

    log.info("-------------------------------------------")
    log.info("spark cluster id:        %s", cluster_conf.cluster_id)
    log.info("spark cluster size:      %s",
             cluster_conf.vm_count + cluster_conf.vm_low_pri_count)
    log.info(">        dedicated:      %s", cluster_conf.vm_count)
    log.info(">     low priority:      %s", cluster_conf.vm_low_pri_count)
    log.info("spark cluster vm size:   %s", cluster_conf.vm_size)
    log.info(
        "custom scripts:          %s",
        len(cluster_conf.custom_scripts) if cluster_conf.custom_scripts else 0)
    log.info("subnet ID:               %s", cluster_conf.subnet_id)
    log.info(
        "file shares:             %s",
        len(cluster_conf.file_shares)
        if cluster_conf.file_shares is not None else 0)
    log.info("docker repo name:        %s", cluster_conf.docker_repo)
    log.info("wait for cluster:        %s", wait)
    log.info("username:                %s", user_configuration.username)
    if user_configuration.password:
        log.info("Password: %s", '*' * len(user_configuration.password))
    log.info("-------------------------------------------")
예제 #8
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_screts())

    # read cluster.yaml configuartion file, overwrite values with args
    cluster_conf = ClusterConfig()

    cluster_conf.merge(uid=args.cluster_id,
                       size=args.size,
                       size_low_pri=args.size_low_pri,
                       vm_size=args.vm_size,
                       subnet_id=args.subnet_id,
                       wait=args.wait,
                       username=args.username,
                       password=args.password,
                       docker_repo=args.docker_repo)

    if cluster_conf.custom_scripts:
        custom_scripts = []
        for custom_script in cluster_conf.custom_scripts:
            custom_scripts.append(
                aztk.spark.models.CustomScript(script=custom_script['script'],
                                               run_on=custom_script['runOn']))
    else:
        custom_scripts = None

    if cluster_conf.file_shares:
        file_shares = []
        for file_share in cluster_conf.file_shares:
            file_shares.append(
                aztk.spark.models.FileShare(
                    storage_account_name=file_share['storage_account_name'],
                    storage_account_key=file_share['storage_account_key'],
                    file_share_path=file_share['file_share_path'],
                    mount_path=file_share['mount_path']))
    else:
        file_shares = None

    if cluster_conf.username:
        ssh_key, password = utils.get_ssh_key_or_prompt(
            spark_client.secrets_config.ssh_pub_key, cluster_conf.username,
            cluster_conf.password, spark_client.secrets_config)
        user_conf = aztk.spark.models.UserConfiguration(
            username=cluster_conf.username, password=password, ssh_key=ssh_key)
    else:
        user_conf = None

    print_cluster_conf(cluster_conf)
    spinner = utils.Spinner()
    spinner.start()

    # create spark cluster
    cluster = spark_client.create_cluster(
        aztk.spark.models.ClusterConfiguration(
            cluster_id=cluster_conf.uid,
            vm_count=cluster_conf.size,
            vm_low_pri_count=cluster_conf.size_low_pri,
            vm_size=cluster_conf.vm_size,
            subnet_id=cluster_conf.subnet_id,
            custom_scripts=custom_scripts,
            file_shares=file_shares,
            docker_repo=cluster_conf.docker_repo,
            spark_configuration=load_aztk_spark_config(),
            user_configuration=user_conf),
        wait=cluster_conf.wait)

    spinner.stop()

    if cluster_conf.wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)
예제 #9
0
def print_cluster_conf(cluster_conf):
    log.info("-------------------------------------------")
    log.info("spark cluster id:        %s", cluster_conf.uid)
    log.info("spark cluster size:      %s",
             cluster_conf.size + cluster_conf.size_low_pri)
    log.info(">        dedicated:      %s", cluster_conf.size)
    log.info(">     low priority:      %s", cluster_conf.size_low_pri)
    log.info("spark cluster vm size:   %s", cluster_conf.vm_size)
    log.info("custom scripts:          %s", cluster_conf.custom_scripts)
    log.info("subnet ID:               %s", cluster_conf.subnet_id)
    log.info(
        "file shares:             %s",
        len(cluster_conf.file_shares)
        if cluster_conf.file_shares is not None else 0)
    log.info("docker repo name:        %s", cluster_conf.docker_repo)
    log.info("wait for cluster:        %s", cluster_conf.wait)
    log.info("username:                %s", cluster_conf.username)
    if cluster_conf.password:
        log.info("Password: %s", '*' * len(cluster_conf.password))
    log.info("-------------------------------------------")
예제 #10
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_screts())

    log.info('-------------------------------------------')
    log.info('spark cluster id:    {}'.format(args.cluster_id))
    log.info('username:            {}'.format(args.username))
    log.info('-------------------------------------------')

    if args.ssh_key:
        ssh_key = args.ssh_key
    else:
        ssh_key = spark_client.secrets_config.ssh_pub_key

    ssh_key, password = utils.get_ssh_key_or_prompt(ssh_key, args.username, args.password, spark_client.secrets_config)

    spark_client.create_user(
        cluster_id=args.cluster_id,
        username=args.username,
        password=password,
        ssh_key=ssh_key
    )

    if password:
        log.info('password:            %s', '*' * len(password))
    elif ssh_key:
        log.info('ssh public key:      %s', ssh_key)

    log.info('-------------------------------------------')
예제 #11
0
def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_screts())
    ssh_conf = SshConfig()

    ssh_conf.merge(cluster_id=args.cluster_id,
                   username=args.username,
                   job_ui_port=args.jobui,
                   job_history_ui_port=args.jobhistoryui,
                   web_ui_port=args.webui,
                   jupyter_port=args.jupyter,
                   name_node_ui_port=args.namenodeui,
                   rstudio_server_port=args.rstudioserver,
                   host=args.host,
                   connect=args.connect)

    http_prefix = 'http://localhost:'
    log.info("-------------------------------------------")
    log.info("spark cluster id:    %s", ssh_conf.cluster_id)
    log.info("open webui:          %s%s", http_prefix, ssh_conf.web_ui_port)
    log.info("open jobui:          %s%s", http_prefix, ssh_conf.job_ui_port)
    log.info("open jobhistoryui:   %s%s", http_prefix,
             ssh_conf.job_history_ui_port)
    log.info("open jupyter:        %s%s", http_prefix, ssh_conf.jupyter_port)
    log.info("open namenodeui:     %s%s", http_prefix,
             ssh_conf.name_node_ui_port)
    log.info("open rstudio server: %s%s", http_prefix,
             ssh_conf.rstudio_server_port)
    log.info("ssh username:        %s", ssh_conf.username)
    log.info("connect:             %s", ssh_conf.connect)
    log.info("-------------------------------------------")

    # get ssh command
    try:
        ssh_cmd = utils.ssh_in_master(
            client=spark_client,
            cluster_id=ssh_conf.cluster_id,
            webui=ssh_conf.web_ui_port,
            jobui=ssh_conf.job_ui_port,
            jobhistoryui=ssh_conf.job_history_ui_port,
            namenodeui=ssh_conf.name_node_ui_port,
            jupyter=ssh_conf.jupyter_port,
            rstudioserver=ssh_conf.rstudio_server_port,
            username=ssh_conf.username,
            host=ssh_conf.host,
            connect=ssh_conf.connect)

        if not ssh_conf.connect:
            log.info("")
            log.info(
                "Use the following command to connect to your spark head node:"
            )
            log.info("\t%s", ssh_cmd)

    except batch_error.BatchErrorException as e:
        if e.error.code == "PoolNotFound":
            raise aztk.error.AztkError(
                "The cluster you are trying to connect to does not exist.")
        else:
            raise
예제 #12
0
def execute(args: typing.NamedTuple):
    spark_client = load_spark_client()

    # read cluster.yaml configuartion file, overwrite values with args
    cluster_conf = ClusterConfig()

    cluster_conf.merge(uid=args.cluster_id,
                       size=args.size,
                       size_low_pri=args.size_low_pri,
                       vm_size=args.vm_size,
                       wait=args.wait,
                       username=args.username,
                       password=args.password,
                       docker_repo=args.docker_repo)

    print_cluster_conf(cluster_conf)

    if cluster_conf.custom_scripts:
        custom_scripts = []
        for custom_script in cluster_conf.custom_scripts:
            custom_scripts.append(
                aztk.spark.models.CustomScript(script=custom_script['script'],
                                               run_on=custom_script['runOn']))
    else:
        custom_scripts = None

    if cluster_conf.file_shares:
        file_shares = []
        for file_share in cluster_conf.file_shares:
            file_shares.append(
                aztk.spark.models.FileShare(
                    storage_account_name=file_share['storage_account_name'],
                    storage_account_key=file_share['storage_account_key'],
                    file_share_path=file_share['file_share_path'],
                    mount_path=file_share['mount_path']))
    else:
        file_shares = None

    jars_src = aztk.utils.constants.DEFAULT_SPARK_JARS_SOURCE

    # create spark cluster
    cluster = spark_client.create_cluster(
        aztk.spark.models.ClusterConfiguration(
            cluster_id=cluster_conf.uid,
            vm_count=cluster_conf.size,
            vm_low_pri_count=cluster_conf.size_low_pri,
            vm_size=cluster_conf.vm_size,
            custom_scripts=custom_scripts,
            file_shares=file_shares,
            docker_repo=cluster_conf.docker_repo,
            spark_configuration=aztk.spark.models.SparkConfiguration(
                spark_defaults_conf=os.path.join(
                    aztk.utils.constants.DEFAULT_SPARK_CONF_SOURCE,
                    'spark-defaults.conf'),
                spark_env_sh=os.path.join(
                    aztk.utils.constants.DEFAULT_SPARK_CONF_SOURCE,
                    'spark-env.sh'),
                core_site_xml=os.path.join(
                    aztk.utils.constants.DEFAULT_SPARK_CONF_SOURCE,
                    'core-site.xml'),
                jars=[
                    os.path.join(jars_src, path)
                    for path in os.listdir(jars_src)
                ])),
        wait=cluster_conf.wait)

    if cluster_conf.username:
        ssh_key = spark_client.secrets_config.ssh_pub_key

        ssh_key, password = utils.get_ssh_key_or_prompt(
            ssh_key, cluster_conf.username, cluster_conf.password,
            spark_client.secrets_config)

        spark_client.create_user(cluster_id=cluster_conf.uid,
                                 username=cluster_conf.username,
                                 password=password,
                                 ssh_key=ssh_key)

    if cluster_conf.wait:
        log.info("Cluster %s created successfully.", cluster.id)
    else:
        log.info("Cluster %s is being provisioned.", cluster.id)