Ejemplo n.º 1
0
class CmCluster(Cluster):
    def __init__(self,
                 host_name,
                 port=None,
                 user="******",
                 password="******",
                 cluster_name=None,
                 ssh_user=None,
                 ssh_port=None,
                 ssh_key_file=None,
                 use_tls=False):
        # Initialize strptime() to workaround https://bugs.python.org/issue7980. Apparently
        # something in the CM API uses strptime().
        strptime("2015", "%Y")

        Cluster.__init__(self)
        # IMPALA-5455: If the caller doesn't specify port, default it based on use_tls
        if port is None:
            if use_tls:
                port = CM_TLS_PORT
            else:
                port = CM_CLEAR_PORT
        self.cm = CmApiResource(host_name,
                                server_port=port,
                                username=user,
                                password=password,
                                use_tls=use_tls)
        clusters = self.cm.get_all_clusters()
        if not clusters:
            raise Exception("No clusters found in CM at %s" % host_name)
        if cluster_name:
            clusters_by_name = dict((c.name, c) for c in clusters)
            if cluster_name not in clusters_by_name:
                raise Exception(("No clusters named %s found in CM at %s."
                                 "Available clusters are %s.") %
                                (cluster_name, host_name, ", ".join(
                                    sorted(clusters_by_name.keys()))))
            self.cm_cluster = clusters_by_name[cluster_name]
        else:
            if len(clusters) > 1:
                raise Exception(
                    ("Too many clusters found in CM at %s;"
                     " a cluster name must be provided") % host_name)
            self.cm_cluster = clusters[-1]

        self.ssh_user = ssh_user
        self.ssh_port = ssh_port
        self.ssh_key_file = ssh_key_file
        self._ssh_client_lock = Lock()
        self._ssh_clients_by_host_name = defaultdict(list)

    def shell(self, cmd, host_name, timeout_secs=DEFAULT_TIMEOUT):
        with self._ssh_client(host_name) as client:
            return client.shell(cmd, timeout_secs=timeout_secs)

    @contextmanager
    def _ssh_client(self, host_name):
        """Returns an SSH client for use in a 'with' block. When the 'with' context exits,
       the client will be kept for reuse.
    """
        with self._ssh_client_lock:
            clients = self._ssh_clients_by_host_name[host_name]
            if clients:
                client = clients.pop()
            else:
                LOG.debug("Creating new SSH client for %s", host_name)
                client = SshClient()
                client.connect(host_name,
                               username=self.ssh_user,
                               key_filename=self.ssh_key_file)
        error_occurred = False
        try:
            yield client
        except Exception:
            error_occurred = True
            raise
        finally:
            if not error_occurred:
                with self._ssh_client_lock:
                    self._ssh_clients_by_host_name[host_name].append(client)

    def _init_local_hadoop_conf_dir(self):
        self._local_hadoop_conf_dir = mkdtemp()
        data = StringIO(
            self.cm.get(
                "/clusters/%s/services/%s/clientConfig" %
                (self.cm_cluster.name, self._find_service("HIVE").name)))
        zip_file = ZipFile(data)
        for name in zip_file.namelist():
            if name.endswith("/"):
                continue
            extract_path = os.path.join(self._local_hadoop_conf_dir,
                                        os.path.basename(name))
            with open(extract_path, "w") as conf_file:
                conf_file.write(zip_file.open(name).read())

    def _find_service(self, service_type):
        """Find a service by its CM API service type. An exception will be raised if no
       service is found or multiple services are found. See the CM API documentation for
       more details about the service type.
    """
        services = [
            s for s in self.cm_cluster.get_all_services()
            if s.type == service_type
        ]
        if not services:
            raise Exception("No service of type %s found in cluster %s" %
                            (service_type, self.cm_cluster.name))
        if len(services) > 1:
            raise Exception(
                "Found %s services in cluster %s; only one is expected." %
                len(services, self.cm_cluster.name))
        return services[0]

    def _find_role(self, role_type, service_type):
        """Find a role by its CM API role and service type. An exception will be raised if
       no roles are found. See the CM API documentation for more details about the
       service and role types.
    """
        service = self._find_service(service_type)
        roles = service.get_roles_by_type(role_type)
        if not roles:
            raise Exception("No roles of type %s found in service %s" %
                            (role_type, service.name))
        return roles[0]

    def _init_hdfs(self):
        self._hdfs = Hdfs(self, "hdfs")

    def _init_hive(self):
        hs2 = self._find_role("HIVESERVER2", "HIVE")
        host = self.cm.get_host(hs2.hostRef.hostId)
        config = hs2.get_config(view="full")["hs2_thrift_address_port"]
        self._hive = Hive(self, str(host.hostname),
                          int(config.value or config.default))

    def _init_impala(self):
        self._impala = CmImpala(self, self._find_service("IMPALA"))
Ejemplo n.º 2
0
class CmCluster(Cluster):

  def __init__(self, host_name, port=None, user="******", password="******",
               cluster_name=None, ssh_user=None, ssh_port=None, ssh_key_file=None,
               use_tls=False):
    # Initialize strptime() to workaround https://bugs.python.org/issue7980. Apparently
    # something in the CM API uses strptime().
    strptime("2015", "%Y")

    Cluster.__init__(self)
    # IMPALA-5455: If the caller doesn't specify port, default it based on use_tls
    if port is None:
      if use_tls:
        port = CM_TLS_PORT
      else:
        port = CM_CLEAR_PORT
    self.cm = CmApiResource(host_name, server_port=port, username=user, password=password,
                            use_tls=use_tls)
    clusters = self.cm.get_all_clusters()
    if not clusters:
      raise Exception("No clusters found in CM at %s" % host_name)
    if cluster_name:
      clusters_by_name = dict((c.name, c) for c in clusters)
      if cluster_name not in clusters_by_name:
        raise Exception(("No clusters named %s found in CM at %s."
            "Available clusters are %s.")
            % (cluster_name, host_name, ", ".join(sorted(clusters_by_name.keys()))))
      self.cm_cluster = clusters_by_name[cluster_name]
    else:
      if len(clusters) > 1:
        raise Exception(("Too many clusters found in CM at %s;"
            " a cluster name must be provided")
            % host_name)
      self.cm_cluster = clusters[-1]

    self.ssh_user = ssh_user
    self.ssh_port = ssh_port
    self.ssh_key_file = ssh_key_file
    self._ssh_client_lock = Lock()
    self._ssh_clients_by_host_name = defaultdict(list)

  def shell(self, cmd, host_name, timeout_secs=DEFAULT_TIMEOUT):
    with self._ssh_client(host_name) as client:
      return client.shell(cmd, timeout_secs=timeout_secs)

  @contextmanager
  def _ssh_client(self, host_name):
    """Returns an SSH client for use in a 'with' block. When the 'with' context exits,
       the client will be kept for reuse.
    """
    with self._ssh_client_lock:
      clients = self._ssh_clients_by_host_name[host_name]
      if clients:
        client = clients.pop()
      else:
        # IMPALA-7460: Insulate this import away from the global context so as to avoid
        # requiring Paramiko unless it's absolutely needed.
        from tests.util.ssh_util import SshClient
        LOG.debug("Creating new SSH client for %s", host_name)
        client = SshClient()
        client.connect(host_name, username=self.ssh_user, key_filename=self.ssh_key_file)
    error_occurred = False
    try:
      yield client
    except Exception:
      error_occurred = True
      raise
    finally:
      if not error_occurred:
        with self._ssh_client_lock:
          self._ssh_clients_by_host_name[host_name].append(client)

  def _init_local_hadoop_conf_dir(self):
    self._local_hadoop_conf_dir = mkdtemp()
    data = StringIO(self.cm.get("/clusters/%s/services/%s/clientConfig"
      % (self.cm_cluster.name, self._find_service("HIVE").name)))
    zip_file = ZipFile(data)
    for name in zip_file.namelist():
      if name.endswith("/"):
        continue
      extract_path = os.path.join(self._local_hadoop_conf_dir, os.path.basename(name))
      with open(extract_path, "w") as conf_file:
        conf_file.write(zip_file.open(name).read())

  def _find_service(self, service_type):
    """Find a service by its CM API service type. An exception will be raised if no
       service is found or multiple services are found. See the CM API documentation for
       more details about the service type.
    """
    services = [s for s in self.cm_cluster.get_all_services() if s.type == service_type]
    if not services:
      raise Exception("No service of type %s found in cluster %s"
          % (service_type, self.cm_cluster.name))
    if len(services) > 1:
      raise Exception("Found %s services in cluster %s; only one is expected."
        % len(services, self.cm_cluster.name))
    return services[0]

  def _find_role(self, role_type, service_type):
    """Find a role by its CM API role and service type. An exception will be raised if
       no roles are found. See the CM API documentation for more details about the
       service and role types.
    """
    service = self._find_service(service_type)
    roles = service.get_roles_by_type(role_type)
    if not roles:
      raise Exception("No roles of type %s found in service %s"
          % (role_type, service.name))
    return roles[0]

  def _init_hdfs(self):
    self._hdfs = Hdfs(self, "hdfs")

  def _init_hive(self):
    hs2 = self._find_role("HIVESERVER2", "HIVE")
    host = self.cm.get_host(hs2.hostRef.hostId)
    config = hs2.get_config(view="full")["hs2_thrift_address_port"]
    self._hive = Hive(self, str(host.hostname), int(config.value or config.default))

  def _init_impala(self):
    self._impala = CmImpala(self, self._find_service("IMPALA"))
Ejemplo n.º 3
0
from cm_api.api_client import ApiException
from boto.s3.connection import S3Connection
from boto.s3.key import Key

CMD_TIMEOUT = 180

manager_host = sys.argv[1]
awsAccessId = sys.argv[2]
awsSecretKey = sys.argv[3]
exportBucket = sys.argv[4]
exportKey = sys.argv[5]

# retrieve cluster configuration

api = ApiResource(manager_host,
                  username="******",
                  password="******",
                  use_tls=False,
                  version=4)

cluster_config = api.get('cm/deployment')

# store retrieved configuration to s3

conn = S3Connection(awsAccessId, awsSecretKey)
bucket = conn.get_bucket(exportBucket)

k = Key(bucket)
k.key = exportKey
k.set_contents_from_string(json.dumps(cluster_config))
Ejemplo n.º 4
0
#!/usr/bin/env python
import sys
import json
from cm_api.api_client import ApiResource
from cm_api.api_client import ApiException
from boto.s3.connection import S3Connection
from boto.s3.key import Key

CMD_TIMEOUT = 180

manager_host = sys.argv[1]
awsAccessId = sys.argv[2]
awsSecretKey = sys.argv[3]
exportBucket = sys.argv[4]
exportKey = sys.argv[5]

# retrieve cluster configuration

api = ApiResource(manager_host, username="******", password="******", use_tls=False, version=4)

cluster_config = api.get('cm/deployment')

# store retrieved configuration to s3

conn = S3Connection(awsAccessId, awsSecretKey)
bucket = conn.get_bucket(exportBucket)

k = Key(bucket)
k.key = exportKey
k.set_contents_from_string(json.dumps(cluster_config))