Ejemplo n.º 1
0
def uninstall_cli(
        api_client,
        cluster_id,
        all,
        jar,
        egg,
        maven_coordinates,
        maven_repo,  # noqa
        maven_exclusion,
        pypi_package,
        pypi_repo,
        cran_package,
        cran_repo):
    """
    Mark libraries on a cluster to be uninstalled. Libraries which are marked to be uninstalled
    will stay attached until the cluster is restarted. (see `databricks clusters restart -h`).
    """
    if all:
        library_statuses = _cluster_status(api_client, cluster_id).get(
            'library_statuses', [])
        libraries = [l_status['library'] for l_status in library_statuses]
        LibrariesApi(api_client).uninstall_libraries(cluster_id, libraries)
        _uninstall_cli_exit_help(cluster_id)
        return
    library = _get_library_from_options(jar, egg, maven_coordinates,
                                        maven_repo, maven_exclusion,
                                        pypi_package, pypi_repo, cran_package,
                                        cran_repo)
    LibrariesApi(api_client).uninstall_libraries(cluster_id, [library])
    _uninstall_cli_exit_help(cluster_id)
Ejemplo n.º 2
0
def install_cli(
        api_client,
        cluster_id,
        jar,
        egg,
        maven_coordinates,
        maven_repo,
        maven_exclusion,  # noqa
        pypi_package,
        pypi_repo,
        cran_package,
        cran_repo):
    """
    Install a library ona a cluster. Libraries must be first uploaded to dbfs or s3
    (see `dbfs cp -h`). Unlike the API, only one library can be installed for each execution of
    `databricks libraries install`.

    Users should only provide one of
    [--jar, --egg, --maven-coordinates, --pypi-package, --cran-package].
    """
    library = _get_library_from_options(jar, egg, maven_coordinates,
                                        maven_repo, maven_exclusion,
                                        pypi_package, pypi_repo, cran_package,
                                        cran_repo)
    LibrariesApi(api_client).install_libraries(cluster_id, [library])
Ejemplo n.º 3
0
 def __init__(self, logger, **kwargs):
     """
     :param **kwargs:
         reserved python word for unlimited parameters
         keys should only include: token, host
     :type **kwargs: dict
     """
     self.api_client = ApiClient(**kwargs)
     self.cluster_client = ClusterApi(self.api_client)
     self.libraries_client = LibrariesApi(self.api_client)
     self.logger = logger
Ejemplo n.º 4
0
def prepare_for_operationalization(cluster_id, api_client, dbfs_path,
                                   overwrite, spark_version):
    """
    Installs appropriate versions of several libraries to support operationalization.

    Args:
        cluster_id (str): cluster_id representing the cluster to prepare for operationalization
        api_client (ApiClient): the ApiClient object used to authenticate to the workspace
        dbfs_path (str): the path on dbfs to upload libraries to
        overwrite (bool): whether to overwrite existing files on dbfs with new files of the same name
        spark_version (str): str version indicating which version of spark is installed on the databricks cluster

    Returns:
        A dictionary of libraries installed
    """
    print("Preparing for operationlization...")

    cosmosdb_jar_url = COSMOSDB_JAR_FILE_OPTIONS[spark_version]

    # download the cosmosdb jar
    local_jarname = os.path.basename(cosmosdb_jar_url)
    # only download if you need it:
    if overwrite or not os.path.exists(local_jarname):
        print("Downloading {}...".format(cosmosdb_jar_url))
        local_jarname, _ = urlretrieve(cosmosdb_jar_url, local_jarname)
    else:
        print("File {} already downloaded.".format(local_jarname))

    # upload jar to dbfs:
    upload_path = Path(dbfs_path, local_jarname).as_posix()
    print("Uploading CosmosDB driver to databricks at {}".format(upload_path))
    if dbfs_file_exists(api_client, upload_path) and overwrite:
        print("Overwriting file at {}".format(upload_path))
    DbfsApi(api_client).cp(recursive=False,
                           src=local_jarname,
                           dst=upload_path,
                           overwrite=overwrite)

    # setup the list of libraries to install:
    # jar library setup
    libs2install = [{"jar": upload_path}]
    # setup libraries to install:
    libs2install.extend([{"pypi": {"package": i}} for i in PYPI_O16N_LIBS])
    print(
        "Installing jar and pypi libraries required for operationalization...")
    LibrariesApi(api_client).install_libraries(cluster_id, libs2install)
    return libs2install
def get_library_state(profile, cluster_id):
    """Get the state of the library installation on the remote cluster
    
    Args:
        cluster_id (str): Cluster ID
        host (str): host from databricks cli config for given profile string
        token (str): token from databricks cli config for given profile stringf
    
    Returns:
        list: list of installation status for each custom library
    """
    try:
        apiclient = connect(profile)
        client = LibrariesApi(apiclient)
        libraries = client.cluster_status(cluster_id)
    except Exception as ex:
        print_error(ex)
        return None

    if libraries.get("library_statuses", None) is None:
        return []
    else:
        return [lib["status"] for lib in libraries["library_statuses"]]
Ejemplo n.º 6
0
def install_cli(
        api_client,
        cluster_id,
        jar,
        egg,
        whl,
        maven_coordinates,
        maven_repo,  # noqa
        maven_exclusion,
        pypi_package,
        pypi_repo,
        cran_package,
        cran_repo):
    """
    Install a library on a cluster. Libraries must be first uploaded to dbfs or s3
    (see `dbfs cp -h`). Unlike the API, only one library can be installed for each execution of
    `databricks libraries install`.

    Users should only provide one of
    [--jar, --egg, --whl, --maven-coordinates, --pypi-package, --cran-package].

    Installing a whl library on clusters running Databricks Runtime 4.2 or higher effectively runs
    the pip command against the wheel file directly on driver and executors.The library must satisfy
    the wheel file name convention.
    To install multiple wheel files, use the .wheelhouse.zip file that includes all the wheel files
    with the --whl option.

    Installing a wheel library on clusters running Databricks Runtime lower than 4.2 just adds the
    file to the PYTHONPATH variable, without installing the dependencies.
    More information is available here:
    https://docs.databricks.com/api/latest/libraries.html#managedlibrariesmanagedlibraryserviceinstalllibraries
    """
    library = _get_library_from_options(jar, egg, whl, maven_coordinates,
                                        maven_repo, maven_exclusion,
                                        pypi_package, pypi_repo, cran_package,
                                        cran_repo)
    LibrariesApi(api_client).install_libraries(cluster_id, [library])
Ejemplo n.º 7
0
def _cluster_status(api_client, cluster_id):
    click.echo(
        pretty_format(LibrariesApi(api_client).cluster_status(cluster_id)))
Ejemplo n.º 8
0
def _all_cluster_statuses(config):
    click.echo(pretty_format(LibrariesApi(config).all_cluster_statuses()))
Ejemplo n.º 9
0
                                               status["state"])))
        sys.exit()

    # install the library and its dependencies
    print("Installing the reco_utils module onto databricks cluster {}".format(
        args.cluster_id))
    libs2install = [{"egg": upload_path}]
    # PYPI dependencies:
    libs2install.extend([{"pypi": {"package": i}} for i in PYPI_RECO_LIB_DEPS])

    # add mmlspark if selected.
    if args.mmlspark:
        print("Installing MMLSPARK package...")
        libs2install.extend([MMLSPARK_INFO])
    print(libs2install)
    LibrariesApi(my_api_client).install_libraries(args.cluster_id,
                                                  libs2install)

    # prepare for operationalization if desired:
    if args.prepare_o16n:
        prepare_for_operationalization(
            cluster_id=args.cluster_id,
            api_client=my_api_client,
            dbfs_path=args.dbfs_path,
            overwrite=args.overwrite,
            spark_version=status["spark_version"][0],
        )

    # restart the cluster for new installation(s) to take effect.
    print("Restarting databricks cluster {}".format(args.cluster_id))
    ClusterApi(my_api_client).restart_cluster(args.cluster_id)