Ejemplo n.º 1
0
def uninstall_cli(
        api_client,
        cluster_id,
        all,
        jar,
        egg,
        whl,
        maven_coordinates,
        maven_repo,  # noqa
        maven_exclusion,
        pypi_package,
        pypi_repo,
        cran_package,
        cran_repo):
    """
    Mark libraries on a cluster to be uninstalled. Libraries which are marked to be uninstalled
    will stay attached until the cluster is restarted. (see `databricks clusters restart -h`).
    """
    if all:
        libraries_api = LibrariesApi(api_client)
        library_statuses = libraries_api.cluster_status(cluster_id).get(
            'library_statuses', [])
        libraries = [l_status['library'] for l_status in library_statuses]
        if len(libraries) == 0:
            return
        libraries_api.uninstall_libraries(cluster_id, libraries)
        _uninstall_cli_exit_help(cluster_id)
        return
    library = _get_library_from_options(jar, egg, whl, maven_coordinates,
                                        maven_repo, maven_exclusion,
                                        pypi_package, pypi_repo, cran_package,
                                        cran_repo)
    LibrariesApi(api_client).uninstall_libraries(cluster_id, [library])
    _uninstall_cli_exit_help(cluster_id)
def get_library_state(profile, cluster_id):
    """Get the state of the library installation on the remote cluster
    
    Args:
        cluster_id (str): Cluster ID
        host (str): host from databricks cli config for given profile string
        token (str): token from databricks cli config for given profile stringf
    
    Returns:
        list: list of installation status for each custom library
    """
    try:
        apiclient = connect(profile)
        client = LibrariesApi(apiclient)
        libraries = client.cluster_status(cluster_id)
    except Exception as ex:
        print_error(ex)
        return None

    if libraries.get("library_statuses", None) is None:
        return []
    else:
        return [lib["status"] for lib in libraries["library_statuses"]]
Ejemplo n.º 3
0
class ClusterManagement:
    def __init__(self, logger, **kwargs):
        """
        :param **kwargs:
            reserved python word for unlimited parameters
            keys should only include: token, host
        :type **kwargs: dict
        """
        self.api_client = ApiClient(**kwargs)
        self.cluster_client = ClusterApi(self.api_client)
        self.libraries_client = LibrariesApi(self.api_client)
        self.logger = logger

    def create_cluster(self, cluster_specs):
        """function to build/edit cluster and start

        :param cluster_specs: cluster specs in clusterconf.yaml
        :type cluster_specs: dict
        """
        # self.cluster_client.get_cluster_by_name("unknown")

        try:
            cluster = self.cluster_client.get_cluster_by_name(
                cluster_specs["cluster_name"])

            self.logger.info(f"cluster {cluster['cluster_name']} exists "
                             f"with id {cluster['cluster_id']}")
            self.logger.debug(cluster_specs)
            self.logger.debug(cluster)

            if not cluster_specs.items() <= cluster.items():
                self.logger.warning(
                    "cluster spec doesn't match existing cluster")

                cluster_specs['cluster_id'] = cluster['cluster_id']
                self.cluster_client.edit_cluster(cluster_specs)
            else:
                self.logger.info("cluster spec matches")
        except Exception:
            cluster = self.cluster_client.create_cluster(cluster_specs)
            self.logger.info(f"the cluster {cluster} is being created")
            time.sleep(30)

        cluster_id = cluster['cluster_id']
        status = self._cluster_status(cluster_id)

        while status['state'] in ["RESTARTING", "RESIZING", "TERMINATING"]:
            self.logger.info(
                f"waiting for the cluster. status {status['state']}")
            time.sleep(10)
            status = self._cluster_status(cluster_id)

        while status['state'] in ["TERMINATED", "PENDING"]:
            self.logger.info(f"cluster status {status['state']}")
            if status['state'] == "TERMINATED":
                self.logger.info(f"starting cluster, status {status['state']}")
                self.cluster_client.start_cluster(cluster_id)

            time.sleep(10)
            status = self._cluster_status(cluster_id)

        self.logger.info(f"cluster is up. final status: {status['state']}")

        return cluster_id

    def install_cluster_library(self, cluster_id, cluster_libraries):
        """function to install libraries on cluster

        :param cluster_id: id of cluster in Databricks to install libs on
        :type cluster_id: str
        :param cluster_libraries: clusterlib.yaml
        :type cluster_libraries: list(dict)
        """
        try:
            if not isinstance(cluster_libraries, list):
                raise ValueError(
                    f"cluster_libraries is not a list: {cluster_libraries}")

            current_libs = self.libraries_client.cluster_status(cluster_id)

            # parse the libs to match the yaml
            parsed_currentlibs = []
            if current_libs.get("library_statuses"):
                for lib in current_libs["library_statuses"]:
                    parsed_currentlibs.append(lib["library"])

            install_libs = [
                x for x in cluster_libraries if x not in parsed_currentlibs
            ]
            self.logger.info(f"install libraries: {install_libs}")
            self.libraries_client.install_libraries(cluster_id, install_libs)

            uninstall_libs = [
                x for x in parsed_currentlibs if x not in cluster_libraries
            ]
            self.logger.warning(f"uninstall libraries: {uninstall_libs}")
            self.libraries_client.uninstall_libraries(cluster_id,
                                                      uninstall_libs)

        except Exception as error:
            self.logger.error(f"install_cluster_library error: {repr(error)}")

    def _cluster_status(self, cluster_id):
        """internal method to get cluster status

        :param cluster_id: id of databricks cluster
        :type cluster_id: str
        """
        try:
            status = self.cluster_client.get_cluster(cluster_id)
            return status
        except Exception as error:
            self.logger.error(f"cluster status error: {error}")

    def delete_unmanaged_clusters(self, cluster_config):
        """function to delete clusters that are not in clusterconf.yaml

        :param cluster_config: clusterconf.yaml
        :type cluster_config: list(dict)
        """
        existing_clusters = self.cluster_client.list_clusters()
        if existing_clusters.get("clusters"):
            existing_clusters = [
                c for c in existing_clusters.get("clusters")
                if c["cluster_source"].upper() != "JOB"
            ]
        self.logger.debug(existing_clusters)

        cluster_list = [c["cluster_name"] for c in cluster_config]
        remove_cluster = [(c["cluster_name"], c["cluster_id"])
                          for c in existing_clusters
                          if c["cluster_name"] not in cluster_list]

        self.logger.warning("removing unmanaged clusters:")
        self.logger.warning(remove_cluster)

        for c in remove_cluster:
            self.logger.debug(f"deleting {c[1]}")
            self.cluster_client.permanent_delete(c[1])

        return

    def main(self, cluster_specs, cluster_libraries):
        """main method to build/edit clusters and install libs

        :cluster_spec: cluster spec in clusterconf.yaml
        :type cluster_spec: dict
        :param cluster_libraries: clusterlib.yaml
        :type cluster_libraries: list(dict)
        """
        # self.logger.info("=======================================================")
        self.logger.info(
            f"create/update cluster: {cluster_specs['cluster_name']}")
        cluster_id = self.create_cluster(cluster_specs)

        self.logger.info("installing libraries")
        self.install_cluster_library(cluster_id, cluster_libraries)