Example #1
0
def main():
    args = parse_arguments()

    databricks_domain = os.environ.get("DATABRICKS_DOMAIN", None)
    databricks_access_token = os.environ.get("DATABRICKS_ACCESS_TOKEN", None)
    databricks_cluster_vmtype = os.environ.get("DATABRICKS_CLUSTER_VMTYPE",
                                               "Standard_D3_v2")

    databricks_cluster_id = os.environ.get("DATABRICKS_CLUSTER_ID", None)

    # If databricks_cluster_id is not None, but it's an empty string: its None
    if databricks_cluster_id is not None and not databricks_cluster_id:
        databricks_cluster_id = None

    databricks_cluster_name_suffix = get_cluster_name()

    cluster = DatabricksCluster(databricks_cluster_id,
                                databricks_cluster_name_suffix,
                                databricks_domain)

    script_dir = os.path.dirname(os.path.abspath(__file__))
    libraries_config_path = os.path.join(script_dir, 'library.json')

    if args.terminate is False:
        provision_cluster(cluster, databricks_access_token,
                          libraries_config_path, databricks_cluster_vmtype)
    else:
        terminate_cluster(cluster, databricks_access_token,
                          libraries_config_path, args.permanent)

    sys.exit(cluster.id)
Example #2
0
def terminate_cluster(cluster: DatabricksCluster, databricks_access_token,
                      libraries_config_path, permanent: bool):
    if cluster.id is not None:
        try:
            cluster.terminate(databricks_access_token, libraries_config_path,
                              permanent)
        except ClusterManagementException as e:
            print(str(e))
            sys.exit(1)

        if permanent is False:
            print("Cluster %s is terminated" % (cluster.id))
        else:
            print("Cluster %s is permanently deleted" % (cluster.id))
    else:
        print("[Error]:Clusterid is required")
        sys.exit(1)
Example #3
0
def provision_cluster(
    cluster: DatabricksCluster,
    databricks_access_token,
    libraries_config_path,
    databricks_cluster_vmtype
):
    try:
        if cluster.id is None or not cluster.id:
            if databricks_cluster_vmtype is not None:
                cluster.create(
                    databricks_access_token,
                    databricks_cluster_vmtype
                )
            else:
                cluster.create(databricks_access_token)
            print("Requested to create the cluster...")
        else:
            cluster.start(databricks_access_token)
            print(
                "Requested to start the cluster with id %s" %
                (cluster.id)
            )

        while cluster.state == 'PENDING':
            print("Cluster %s is pending..." % (cluster.id))
            time.sleep(30)
            cluster.get_state(databricks_access_token)

        cluster.install_libraries(
            databricks_access_token,
            libraries_config_path
        )
        print("Installing libraries on %s..." % (cluster.id))

        libraries_status = cluster.check_libraries(databricks_access_token)
        while libraries_status == 'INSTALLING':
            time.sleep(30)
            print("Installing libraries on %s..." % (cluster.id))
            libraries_status = cluster.check_libraries(
                databricks_access_token
            )

        print(
            "Libraries installed and verified on cluster %s" %
            (cluster.id)
        )

        print("Cluster %s is ready" % (cluster.id))
    except ClusterManagementException as e:
        print(str(e))
        sys.exit(1)