コード例 #1
0
    def __init__(self, host, connection_info, python_path, sudo, timeout, env,
                 no_spark):
        self._logger = setup_logging("DatabricksKernel")

        self.no_spark = no_spark

        self.dbjl_env = dict([e.split("=") for e in env[0].split(" ")])
        self._logger.debug("Environment = %s", self.dbjl_env)
        self.profile = self.dbjl_env.get("DBJL_PROFILE", None)
        self.host, self.token = get_db_config(self.profile)

        self.cluster_id = self.dbjl_env.get("DBJL_CLUSTER", None)

        self.python_command = None
        self.scala_context_id = None
        if not no_spark:
            # create remote executions context and retrieve its python path
            python_path = self.create_execution_context()
        self._logger.info("Remote python path: %s", python_path)

        super().__init__(
            host,
            connection_info,
            python_path,
            sudo=sudo,
            timeout=timeout,
            env=env,
            logger=self._logger,
        )

        self.kernel_status = DatabricksKernelStatus(connection_info,
                                                    self._logger)
コード例 #2
0
    def start_cluster(self, profile, cluster_id, kernel_id):
        """Start cluster in a separate thread
        
        Args:
            profile (str): Databricks CLI profile string
            cluster_id (str): Cluster ID
            kernel_id (str): Internal jupyter kernel ID
        """
        global_status = KernelHandler.status

        if global_status.installing(profile, cluster_id):
            _logger.info("DbStartHandler cluster %s:%s already starting" %
                         (profile, cluster_id))
        else:
            _logger.info("DbStartHandler cluster %s:%s start triggered" %
                         (profile, cluster_id))
            global_status.set_installing(profile, cluster_id)

            host, token = get_db_config(profile)
            cluster_id, public_ip, cluster_name, dummy = get_cluster(
                profile, host, token, cluster_id, global_status)
            if cluster_name is None:
                global_status.set_status(profile, cluster_id,
                                         "ERROR: Cluster could not be found")
                return

            global_status.set_status(profile, cluster_id, "Configuring SSH")
            prepare_ssh_config(cluster_id, profile, public_ip)
            if not is_reachable(public_dns=public_ip):
                global_status.set_status(profile, cluster_id, "UNREACHABLE")
            else:
                global_status.set_status(profile, cluster_id,
                                         "Installing driver libs")
                result = install_libs(cluster_id, host, token)
                if result[0] == 0:
                    _logger.info("DbStartHandler: installations done")
                else:
                    _logger.error("DbStartHandler: installations failed")
                    global_status.set_status(profile, cluster_id, "ERROR")

                time.sleep(1)
                kernel = self.get_kernel(kernel_id)
                kernel.restart_kernel(now=True)
                global_status.set_status(profile, cluster_id, "Running")
            global_status.unset_installing(profile, cluster_id)
コード例 #3
0
 def setup_method(self):
     self.profile = get_profile()
     self.host, self.token = get_db_config(self.profile)
     self.log = logging.getLogger("TestEnd2End")
     self.log.info("Using %s on %s", EXE, ("AWS" if is_aws() else "Azure"))
コード例 #4
0
    def kernel_customize(self):
        if self.no_spark:
            self._logger.info("This kernel will have no Spark Session (--no-spark)")
            return None

        self._logger.debug("Create Spark Session")

        profile = self.dbjl_env.get("DBJL_PROFILE", None)
        if profile is None:
            self._logger.error("Environment variable DBJL_PROFILE is not set")
            return None

        cluster_id = self.dbjl_env.get("DBJL_CLUSTER", None)
        if cluster_id is None:
            self._logger.error("Environment variable DBJL_CLUSTER is not set")
            return None

        host, token = get_db_config(profile)
        self._logger.debug("profile=%s, host=%s, cluster_id=%s", profile, host, cluster_id)

        try:
            self.command = Command(url=host, cluster_id=cluster_id, token=token)
        except DatabricksApiException as ex:
            self._logger.error(str(ex))
            return None

        self._logger.info("Gateway created for cluster '%s'", cluster_id)

        # Fetch auth_token and gateway port ...
        #
        try:
            cmd = (
                "c=sc._gateway.client.gateway_client; "
                + 'print(c.gateway_parameters.auth_token + "|" + str(c.port))'
            )
            result = self.command.execute(cmd)
        except Exception as ex:  # pylint: disable=broad-except
            result = (-1, str(ex))

        if result[0] != 0:
            self._logger.error("error %s: %s", *result)
            return None

        gw_token, gw_port = result[1].split("|")
        gw_port = int(gw_port)
        self._logger.debug("Gateway token=%s, port=%s", gw_token, gw_port)

        cmd = (
            "from databrickslabs_jupyterlab.connect import dbcontext; "
            + "dbcontext(progressbar=True, gw_port={gw_port}, gw_token='{gw_token}')".format(
                gw_port=gw_port, gw_token=gw_token
            )
        )
        try:
            result = self.kc.execute_interactive(
                cmd, silent=True, store_history=False, user_expressions={"spark": "spark.version"}
            )
            if result["content"]["status"] != "error":
                self.kernel_status.set_spark_running()
            else:
                self._logger.error("Error: Cluster unreachable")
                self.kernel_status.set_unreachable()

        except Exception as ex:  # pylint: disable=broad-except
            self._logger.error("Error: %s", str(ex))
            self.kernel_status.set_connect_failed()
コード例 #5
0
def create_cluster(client, cluster_conf):
    try:
        response = client.create_cluster(cluster_conf)
        return response
    except Exception as ex:  # pylint: disable=broad-except
        print(ex)
        return None


assert os.environ.get("CLOUD") is not None

profile = get_profile()
instances = get_instances()
spark_versions = get_spark_versions()

host, token = get_db_config(profile)
ssh_key = open(expanduser("~/.ssh/id_{}.pub".format(profile))).read()

try:
    apiclient = connect(profile)
    client = ClusterApi(apiclient)
except Exception as ex:  # pylint: disable=broad-except
    print(ex)
    sys.exit(1)

random.seed(42)

cluster_ids = {}

for spark_version in spark_versions: