Ejemplo n.º 1
0
class ImpalaCluster(object):
  def __init__(self, cm_host, cm_cluster_name, username, password):
    self.cm_api = ApiResource(cm_host, username=username, password=password)
    self.hosts = dict()
    self.services = list()
    self.cluster = self.cm_api.get_cluster(cm_cluster_name)
    if self.cluster is None:
      raise RuntimeError, 'Cluster name "%s" not found' % cm_cluster_name

    self.__load_hosts()
    self.__impala_service = ImpalaService(self)

  def _get_all_services(self):
    return self.cluster.get_all_services()

  def get_impala_service(self):
    return self.__impala_service

  def __load_hosts(self):
    self.hosts = dict()
    # Search for all hosts that are in the target cluster.
    # There is no API that provides the list of host in a given cluster, so to find them
    # we must loop through all the hosts and check the cluster name matches.
    for host_info in self.cm_api.get_all_hosts():
      # host_info doesn't include a link to the roleRef so need to do another lookup
      # based on the hostId.
      host = self.cm_api.get_host(host_info.hostId)
      for roleRef.get('clusterName') == self.cluster_name:
        self.hosts[host_info.hostId] = Host(host)
          break
Ejemplo n.º 2
0
def get_cluster_info(manager_host, server_port=7180, username='******',
                     password='******'):
    cm_api = ApiResource(manager_host, username=username, password=password,
                         server_port=server_port, version=9)
    host = list(cm_api.get_all_hosts())[0]  # all hosts same instance type
    cluster = list(cm_api.get_all_clusters())[0]
    yarn = filter(lambda x: x.type == 'YARN',
                  list(cluster.get_all_services()))[0]
    hive = filter(lambda x: x.type == 'HIVE',
                  list(cluster.get_all_services()))[0]
    impala = filter(lambda x: x.type == 'IMPALA',
                    list(cluster.get_all_services()))[0]
    hive_hs2 = hive.get_roles_by_type('HIVESERVER2')[0]
    hive_host = cm_api.get_host(hive_hs2.hostRef.hostId).hostname
    hive_port = int(
        hive_hs2.get_config('full')['hs2_thrift_address_port'].default)
    impala_hs2 = impala.get_roles_by_type('IMPALAD')[0]
    impala_host = cm_api.get_host(impala_hs2.hostRef.hostId).hostname
    impala_port = int(impala_hs2.get_config('full')['hs2_port'].default)
    return {'num_worker_nodes': len(yarn.get_roles_by_type('NODEMANAGER')),
            'node_cores': host.numCores, 'node_memory': host.totalPhysMemBytes,
            'hive_host': hive_host, 'hive_port': hive_port,
            'impala_host': impala_host, 'impala_port': impala_port}
Ejemplo n.º 3
0
class cm_utils(object):


    def __init__(self,service,role,host,list):
        self.service = service.lower()
        self.role = role.lower()
        self.host = host.lower()
	self.list = list.lower()
        cm_host = '10.7.177.234'
        self.api = ApiResource(cm_host, username="******", password="******")
        # "ALL" if service == "None" else  service
        # "ALL" if role == "None" else role
        # "ALL" if host == "None" else host


    def main(self):

#
        s_filter = None
        for c in self.api.get_all_clusters():
            print c
            for s in c.get_all_services():
                print "SERVICE : " + s.displayName + "==============="
#                if (s.displayName.lower() == self.service) or (self.service == "all"):
                if ( self.service in s.displayName.lower() ) or (self.service == "all"):
                    s_filter = s
                    for r in s_filter.get_all_roles():
#                        print "ROLE : " + r.type + "================"
                        if (self.role in r.type.lower()) or (self.role == "all"):
                            h = r.hostRef.hostId
                            hostname,ipAddress,healthSummary = self._get_host_info(h)
                            if (self.host in hostname) or (self.host in ipAddress) or (self.host in h) or (self.host == "all"):
                                if self.list == "yes":
                                    print ipAddress
                                else:
                                    print "[" + r.type + "]" + hostname + " " + ipAddress + " " + healthSummary




    def _get_host_info(self,hostid):
        host = self.api.get_host(hostid)
#        self.hostname = host.hostname
#        self.host_ip = host.ipAddress
#        self.host_status = host.healthSummary

        return host.hostname,host.ipAddress,host.healthSummary
Ejemplo n.º 4
0
def main(cm_fqhn, cm_user_name, cm_user_password, cm_cluster_name,
         cm_tls_enabled, cm_tls_cafile):
    #print  cm_fqhn, cm_user_name, cm_user_password, cm_cluster_name, cm_tls_enabled, cm_tls_cafile
    if cm_tls_enabled == 'false':
        api = ApiResource(server_host=cm_fqhn,
                          username=cm_user_name,
                          password=cm_user_password)
    else:
        #context = ssl.create_default_context(cafile='/opt/cloudera/security/certs/ChainedCA.cert.pem')
        context = ssl.create_default_context(cafile=cm_tls_cafile)
        api = ApiResource(server_host=cm_fqhn,
                          username=cm_user_name,
                          password=cm_user_password,
                          use_tls=True,
                          ssl_context=context)

    # Get a list of all clusters
    cdh_cluster = None

    for c in api.get_all_clusters():
        if c.name == cm_cluster_name:
            print '\nCluster:', c
            cdh_cluster = c
            for x in cdh_cluster.list_hosts():
                HOST_NAME2ID_MAP[api.get_host(x.hostId).hostname] = x.hostId
                HOST_ID2NAME_MAP[x.hostId] = api.get_host(x.hostId).hostname
            print '\nHostName to HostId Mapping:'
            for x in HOST_NAME2ID_MAP:
                print x, HOST_NAME2ID_MAP[x]
            print '\nHostId to HostName Mapping:'
            for x in HOST_ID2NAME_MAP:
                print x, HOST_ID2NAME_MAP[x]
            print '\nServices:'
            for x in cdh_cluster.get_all_services():
                print x.type

            #ZooKeeper
            #zk_client_port = getKeyValueByServiceTypeAndRoleType(cdh_cluster,
            #                                    SERVICE_TYPE_MAP['zookeeper'],
            #                                    SERVICE_ROLE_TYPE_MAP['zookeeper'],
            #                                    'clientPort');
            zk_service = getServiceByServiceType(cdh_cluster,
                                                 SERVICE_TYPE_MAP['zookeeper'])
            zk_server_rcg = getRCGByServiceAndRoleType(
                zk_service, SERVICE_ROLE_TYPE_MAP['zookeeper_server'])
            zk_client_port = geValueByKeyInRCG(
                zk_server_rcg, CONFIG_PROPERTY_MAP['zk_client_port'])
            if zk_client_port != None:
                CONFIG_KEY_VALUE_MAP['ZOOKEEPER_PORT'] = zk_client_port
            zk_hosts = getHostsByServiceAndRoleType(
                zk_service, SERVICE_ROLE_TYPE_MAP['zookeeper_server'])
            #print 'ZOOKEEPER HOSTS:', zk_hosts
            if len(zk_hosts) > 0:
                CONFIG_KEY_VALUE_MAP['ZOOKEEPER_QUORUM'] = ' '.join(zk_hosts)

            #HDFS
            hdfs_service = getServiceByServiceType(cdh_cluster,
                                                   SERVICE_TYPE_MAP['hdfs'])
            hdfs_nn_rcg = getRCGByServiceAndRoleType(
                hdfs_service, SERVICE_ROLE_TYPE_MAP['namenode'])
            #inspectKVsInRCG(hdfs_nn_rcg)
            hdfs_nn_ns = geValueByKeyInRCG(hdfs_nn_rcg,
                                           CONFIG_PROPERTY_MAP['hdf_nn_ns'])
            #print 'HDFS NAMENODE NAMESERVICE:', hdfs_nn_ns
            hdfs_nn_port = geValueByKeyInRCG(
                hdfs_nn_rcg, CONFIG_PROPERTY_MAP['hdf_nn_port'])
            #print 'HDFS NAMENODE PORT:', hdfs_nn_port
            if hdfs_nn_port == None:
                hdfs_nn_port = CONFIG_KEY_VALUE_MAP['NAME_NODE_PORT']
            else:
                CONFIG_KEY_VALUE_MAP['NAME_NODE_PORT'] = hdfs_nn_port
            nn_hosts = None
            if hdfs_nn_ns == None:
                nn_hosts = getHostsByServiceAndRoleType(
                    hdfs_service, SERVICE_ROLE_TYPE_MAP['namenode'])
                #print 'HDFS NAMENODE HOSTS:', nn_hosts
                CONFIG_KEY_VALUE_MAP[
                    'NAME_NODE'] = 'hdfs://' + nn_hosts[0] + ':' + hdfs_nn_port
            else:
                CONFIG_KEY_VALUE_MAP['NAME_NODE'] = hdfs_nn_ns

            #YARN
            yarn_service = getServiceByServiceType(cdh_cluster,
                                                   SERVICE_TYPE_MAP['yarn'])
            #inspectRolesByService(yarn_service)
            #inspectRCGs(yarn_service)
            yarn_jt_rcg = getRCGByServiceAndRoleType(
                yarn_service, SERVICE_ROLE_TYPE_MAP['resourcemanager'])
            #inspectKVsInRCG(yarn_jt_rcg)
            yarn_rm_address = geValueByKeyInRCG(
                yarn_jt_rcg, CONFIG_PROPERTY_MAP['yarn_rm_address'])
            if yarn_rm_address == None:
                yarn_rm_address = CONFIG_KEY_VALUE_MAP[
                    'RESOURCEMANAGER_ADDRESS']
            else:
                CONFIG_KEY_VALUE_MAP[
                    'RESOURCEMANAGER_ADDRESS'] = yarn_rm_address
            rm_hosts = getHostsByServiceAndRoleType(
                yarn_service, SERVICE_ROLE_TYPE_MAP['resourcemanager'])
            #print 'YARN RESOURCEMANGER HOSTS:', rm_hosts
            CONFIG_KEY_VALUE_MAP[
                'JOB_TRACKER'] = rm_hosts[0] + ':' + yarn_rm_address

            #OOZIE
            oozie_service = getServiceByServiceType(cdh_cluster,
                                                    SERVICE_TYPE_MAP['oozie'])
            #inspectConfigByService(oozie_service)
            oozie_use_ssl = getValueByKeyServiceConfig(
                oozie_service, CONFIG_PROPERTY_MAP['oozie_use_ssl'])
            #print 'OOZIE TLS/SSL:', oozie_use_ssl
            if oozie_use_ssl == 'true':
                CONFIG_KEY_VALUE_MAP['OOZIE_USE_SSL'] = 'true'
            oozie_LB = getValueByKeyServiceConfig(
                oozie_service, CONFIG_PROPERTY_MAP['oozie_load_balancer'])

            #inspectRolesByService(oozie_service)
            #inspectRCGs(oozie_service)
            oozie_server_rcg = getRCGByServiceAndRoleType(
                oozie_service, SERVICE_ROLE_TYPE_MAP['oozie_server'])
            #inspectKVsInRCG(oozie_server_rcg)
            oozie_http_port = geValueByKeyInRCG(
                oozie_server_rcg, CONFIG_PROPERTY_MAP['oozie_http_port'])
            oozie_https_port = geValueByKeyInRCG(
                oozie_server_rcg, CONFIG_PROPERTY_MAP['oozie_https_port'])
            if oozie_http_port == None:
                oozie_http_port = CONFIG_KEY_VALUE_MAP['OOZIE_HTTP_PORT']
            if oozie_https_port == None:
                oozie_https_port = CONFIG_KEY_VALUE_MAP['OOZIE_HTTPS_PORT']
            #print 'OOOZIE http(s) ports:', oozie_http_port, oozie_https_port
            oozie_hosts = getHostsByServiceAndRoleType(
                oozie_service, SERVICE_ROLE_TYPE_MAP['oozie_server'])
            #print oozie_hosts
            if CONFIG_KEY_VALUE_MAP['OOZIE_USE_SSL'] == 'true':
                if oozie_LB != None:
                    CONFIG_KEY_VALUE_MAP['OOZIE_URL'] = 'https://' + oozie_LB
                else:
                    CONFIG_KEY_VALUE_MAP[
                        'OOZIE_URL'] = 'http://' + oozie_hosts[
                            0] + ':' + CONFIG_KEY_VALUE_MAP[
                                'OOZIE_HTTPS_PORT'] + '/oozie'
            else:
                if oozie_LB != None:
                    CONFIG_KEY_VALUE_MAP['OOZIE_URL'] = 'http://' + oozie_LB
                else:
                    CONFIG_KEY_VALUE_MAP[
                        'OOZIE_URL'] = 'http://' + oozie_hosts[
                            0] + ':' + CONFIG_KEY_VALUE_MAP[
                                'OOZIE_HTTP_PORT'] + '/oozie'

            #HBASE
            hbase_service = getServiceByServiceType(cdh_cluster,
                                                    SERVICE_TYPE_MAP['hbase'])
            #inspectConfigByService(hbase_service)
            #inspectRolesByService(hbase_service)
            hbase_rs_rcg = getRCGByServiceAndRoleType(
                hbase_service, SERVICE_ROLE_TYPE_MAP['hbase_restserver'])
            #inspectKVsInRCG(hbase_rs_rcg)
            hbase_rs_port = geValueByKeyInRCG(
                hbase_rs_rcg, CONFIG_PROPERTY_MAP['hbase_rs_port'])
            if hbase_rs_port != None:
                CONFIG_KEY_VALUE_MAP['HBASE_REST_PORT'] = hbase_rs_port
            hbase_rs_hosts = getHostsByServiceAndRoleType(
                hbase_service, SERVICE_ROLE_TYPE_MAP['hbase_restserver'])
            CONFIG_KEY_VALUE_MAP['HBASE_REST_IP'] = hbase_rs_hosts[0]

            #KAFKA
            kafka_service = getServiceByServiceType(cdh_cluster,
                                                    SERVICE_TYPE_MAP['kafka'])
            #inspectConfigByService(kafka_service)
            #inspectRolesByService(kafka_service)
            kafka_broker_rcg = getRCGByServiceAndRoleType(
                kafka_service, SERVICE_ROLE_TYPE_MAP['kafka_broker'])
            #inspectKVsInRCG(kafka_broker_rcg)
            kafka_client_security_protocol = geValueByKeyInRCG(
                kafka_broker_rcg,
                CONFIG_PROPERTY_MAP['kafka_client_security_protocol'])
            if kafka_client_security_protocol != None:
                CONFIG_KEY_VALUE_MAP[
                    'KAFKA_SECURITY_PROTOCOL'] = kafka_client_security_protocol
            kafka_broker_hosts = getHostsByServiceAndRoleType(
                kafka_service, SERVICE_ROLE_TYPE_MAP['kafka_broker'])
            if len(kafka_broker_hosts) > 0:
                CONFIG_KEY_VALUE_MAP['KAFKA_BROKER'] = ' '.join(zk_hosts)

            # Print all
            print '\nOUTPUT:\n', CONFIG_KEY_VALUE_MAP
Ejemplo n.º 5
0
hdfs_service = None
hdfs_service_role_list = None
hdfs_host_list = []
yarn_service = None
yarn_service_role_list = None
yarn_host_list = []
oozie_service = None
oozie_service_role_list = None
oozie_host_list = []

for c in api.get_all_clusters():
    #  if c.name == cluster_name:
    print c
    cdh_cluster = c
    for x in cdh_cluster.list_hosts():
        host_name2id_map[api.get_host(x.hostId).hostname] = x.hostId
        host_id2name_map[x.hostId] = api.get_host(x.hostId).hostname
    for x in cdh_cluster.get_all_services():
        print x.type
    for x in host_name2id_map:
        print x, host_name2id_map[x]
    for x in host_id2name_map:
        print x, host_id2name_map[x]

for s in cdh_cluster.get_all_services():
    if s.type == zk_service_type:
        print 'SERVICE:', s.type, s.get_config()
        zk_service = s
        zk_service_role_group_list = zk_service.get_all_role_config_groups()
        for x in zk_service_role_group_list:
            print 'ROLE_GROUP:', x.roleType,
Ejemplo n.º 6
0
class ClouderaManager(object):
    """
    The complete orchestration of a cluster from start to finish assuming all the hosts are
    configured and Cloudera Manager is installed with all the required databases setup.

    Handle all the steps required in creating a cluster. All the functions are built to function
    idempotently. So you should be able to resume from any failed step but running thru the
    __class__.setup()
    """
    def __init__(self, module, config, trial=False, license_txt=None):
        self.api = ApiResource(config['cm']['host'],
                               username=config['cm']['username'],
                               password=config['cm']['password'])
        self.manager = self.api.get_cloudera_manager()
        self.config = config
        self.module = module
        self.trial = trial
        self.license_txt = license_txt
        self.cluster = None

    def enable_license(self):
        """
        Enable the requested license, either it's trial mode or a full license is entered and
        registered.
        """
        try:
            _license = self.manager.get_license()
        except ApiException:
            print_json(type="LICENSE", msg="Enabling license")
            if self.trial:
                self.manager.begin_trial()
            else:
                if license_txt is not None:
                    self.manager.update_license(license_txt)
                else:
                    fail(
                        self.module,
                        'License should be provided or trial should be specified'
                    )

            try:
                _license = self.manager.get_license()
            except ApiException:
                fail(self.module, 'Failed enabling license')
        print_json(type="LICENSE",
                   msg="Owner: {}, UUID: {}".format(_license.owner,
                                                    _license.uuid))

    def create_cluster(self):
        """
        Create a cluster and add hosts to the cluster. A new cluster is only created
        if another one doesn't exist with the same name.
        """
        print_json(type="CLUSTER", msg="Creating cluster")
        cluster_config = self.config['cluster']
        try:
            self.cluster = self.api.get_cluster(cluster_config['name'])
        except ApiException:
            print_json(type="CLUSTER",
                       msg="Creating Cluster entity: {}".format(
                           cluster_config['name']))
            self.cluster = self.api.create_cluster(
                cluster_config['name'], cluster_config['version'],
                cluster_config['fullVersion'])

        cluster_hosts = [
            self.api.get_host(host.hostId).hostname
            for host in self.cluster.list_hosts()
        ]
        hosts = []
        for host in cluster_config['hosts']:
            if host not in cluster_hosts:
                hosts.append(host)
        self.cluster.add_hosts(hosts)

    def activate_parcels(self):
        print_json(type="PARCELS", msg="Setting up parcels")
        for parcel_cfg in self.config['parcels']:
            parcel = Parcels(self.module, self.manager, self.cluster,
                             parcel_cfg.get('version'), parcel_cfg.get('repo'),
                             parcel_cfg.get('product', 'CDH'))
            parcel.download()
            parcel.distribute()
            parcel.activate()

    @retry(attempts=20, delay=5)
    def wait_inspect_hosts(self, cmd):
        """
        Inspect all the hosts. Basically wait till the check completes on all hosts.

        :param cmd: A command instance used for tracking the status of the command
        """
        print_json(type="HOSTS", msg="Inspecting hosts")
        cmd = cmd.fetch()
        if cmd.success is None:
            raise ApiException("Waiting on command {} to finish".format(cmd))
        elif not cmd.success:
            if (cmd.resultMessage is not None
                    and 'is not currently available for execution'
                    in cmd.resultMessage):
                raise ApiException('Retry Command')
            fail(self.module, 'Host inspection failed')
        print_json(type="HOSTS",
                   msg="Host inspection completed: {}".format(
                       cmd.resultMessage))

    def deploy_mgmt_services(self):
        """
        Configure, deploy and start all the Cloudera Management Services.
        """
        print_json(type="MGMT", msg="Deploying Management Services")
        try:
            mgmt = self.manager.get_service()
            if mgmt.serviceState == 'STARTED':
                return
        except ApiException:
            print_json(type="MGMT",
                       msg="Management Services don't exist. Creating.")
            mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo())

        for role in config['services']['MGMT']['roles']:
            if not len(mgmt.get_roles_by_type(role['group'])) > 0:
                print_json(type="MGMT",
                           msg="Creating role for {}".format(role['group']))
                mgmt.create_role('{}-1'.format(role['group']), role['group'],
                                 role['hosts'][0])

        for role in config['services']['MGMT']['roles']:
            role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format(
                role['group']))
            role_group.update_config(role.get('config', {}))

        mgmt.start().wait()
        if self.manager.get_service().serviceState == 'STARTED':
            print_json(type="MGMT", msg="Management Services started")
        else:
            fail(
                self.module,
                "[MGMT] Cloudera Management services didn't start up properly")

    def service_orchestrate(self, services):
        """
        Create, pre-configure provided list of services
        Stop/Start those services
        Perform and post service startup actions

        :param services: List of Services to perform service specific actions
        """
        service_classes = []

        # Create and pre-configure provided services
        for service in services:
            service_config = self.config['services'].get(service.upper())
            if service_config:
                svc = getattr(sys.modules[__name__], service)(self.cluster,
                                                              service_config)
                if not svc.started:
                    svc.deploy()
                    svc.pre_start()
                service_classes.append(svc)

        print_json(type="CLUSTER",
                   msg="Starting services: {} on Cluster".format(services))

        # Deploy all the client configs, since some of the services depend on other services
        # and is essential that the client configs are in place
        self.cluster.deploy_client_config()

        # Start each service and run the post_start actions for each service
        for svc in service_classes:
            # Only go thru the steps if the service is not yet started. This helps with
            # re-running the script after fixing errors
            if not svc.started:
                svc.start()
                svc.post_start()

    def setup(self):
        # TODO(rnirmal): Cloudera Manager SSL?

        # Enable a full license or start a trial
        self.enable_license()

        # Create the cluster entity and associate hosts
        self.create_cluster()

        # Download and activate the parcels
        self.activate_parcels()

        # Inspect all the hosts
        self.wait_inspect_hosts(self.manager.inspect_hosts())

        # Create Management services
        self.deploy_mgmt_services()

        # Configure and Start base services
        self.service_orchestrate(BASE_SERVICES)

        # Configure and Start remaining services
        self.service_orchestrate(ADDITIONAL_SERVICES)
Ejemplo n.º 7
0
def main():
  module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS))

  api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=9)
  cluster_name = CLUSTER_NAME

  manager = api.get_cloudera_manager()

  action_a = module.params.get('action', None)

  if action_a == 'create_cluster':
    license_a = module.params.get('license', None)
    version_a = module.params.get('version', None)

    cluster_list = [x.name for x in api.get_all_clusters()]
    if cluster_name in cluster_list:
      module.exit_json(changed=False, msg='Cluster exists')
    else:
      cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a)
      if license_a == None:
        manager.begin_trial()
      else:
        manager.update_license(license_a.decode('base64'))
      module.exit_json(changed=True, msg='Cluster created')
  elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster', 'create_snapshot_policy', 'deploy_configuration']:
    # more complicated actions that need a created cluster go here
    cluster = api.get_cluster(cluster_name)
    host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts())

    # adds a host to the cluster
    # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal
    if action_a == 'add_host':
      host_a = module.params.get('host', None)

      host_list = host_map.keys()
      if host_a in host_list:
        module.exit_json(changed=False, msg='Host already in cluster')
      else:
        try:
          cluster.add_hosts([host_a])
        except ApiException:
          # if a host isn't there, it could be because the agent didn't manage to connect yet
          # so let's wait a moment for it
          sleep(120)
          cluster.add_hosts([host_a])

        module.exit_json(changed=True, msg='Host added')

    # create management service and set it's basic configuration
    # this needs a separate function since management is handled
    # differently than the rest of services
    elif action_a == 'create_mgmt':
      host_a = module.params.get('host', None)

      # getting the management service is the only way to check if mgmt exists
      # an exception means there isn't one
      try:
        mgmt = manager.get_service()
        module.exit_json(changed=False, msg='Mgmt service already exists')
      except ApiException:
        pass

      mgmt = manager.create_mgmt_service(ApiServiceSetupInfo())

      # this is ugly... and I see no good way to unuglify it
      firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")
      reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")

      # since there is no easy way of configuring the manager... let's do it here :(
      role_conf = defaultdict(dict)
      role_conf['ACTIVITYMONITOR'] = {
          'firehose_database_host': '{0}:7432'.format(host_a),
          'firehose_database_user': '******',
          'firehose_database_password': firehose_passwd,
          'firehose_database_type': 'postgresql',
          'firehose_database_name': 'amon',
          'firehose_heapsize': '268435456',
      }
      role_conf['EVENTSERVER'] = {
          'event_server_heapsize': '215964392'
      }
      role_conf['REPORTSMANAGER'] = {
          'headlamp_database_host': '{0}:7432'.format(host_a),
          'headlamp_database_user': '******',
          'headlamp_database_password': reports_passwd,
          'headlamp_database_type': 'postgresql',
          'headlamp_database_name': 'rman',
          'headlamp_heapsize': '268435456',
      }

      roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER']
      # create mangement roles
      for role in roles:
        mgmt.create_role('{0}-1'.format(role), role, host_map[host_a])

      # update configuration of each
      for group in mgmt.get_all_role_config_groups():
        group.update_config(role_conf[group.roleType])

      mgmt.start().wait()
      # after starting this service needs time to spin up
      sleep(30)
      module.exit_json(changed=True, msg='Mgmt created and started')

    # deploy a given parcel on all hosts in the cluster
    # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4
    elif action_a == 'deploy_parcel':
      name_a = module.params.get('name', None)
      version_a = module.params.get('version', None)

      if "latest" in version_a:
        available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a]
        if "-latest" in version_a:
          version_substr = match('(.+?)-latest', version_a).group(1)
        # if version is just "latest", try to check everything
        else:
          version_substr = ".*"
        try:
          [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None]
        except ValueError:
          module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions))
      else:
        version_parcel = version_a

      # we now go through various stages of getting the parcel
      # as there is no built-in way of waiting for an operation to complete
      # we use loops with sleep to get it done
      parcel = cluster.get_parcel(name_a, version_parcel)
      if parcel.stage == 'AVAILABLE_REMOTELY':
        parcel.start_download()

        while parcel.stage != 'DOWNLOADED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          sleep(10)

      if parcel.stage == 'DOWNLOADED':
        parcel.start_distribution()

        while parcel.stage != 'DISTRIBUTED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          # sleep while hosts report problems after the download
          for i in range(12):
            sleep(10)
            if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
              break

      # since parcels are distributed automatically when a new host is added to a cluster
      # we can encounter the ,,ACTIVATING'' stage then
      if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING':
        if parcel.stage == 'DISTRIBUTED':
          parcel.activate()

        while parcel.stage != 'ACTIVATED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          # this sleep has to be large because although the operation is very fast
          # it makes the management and cloudera hosts go bonkers, failing all of the health checks
          sleep(10)

        # sleep while hosts report problems after the distribution
        for i in range(60):
          sleep(10)
          if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
            break

        module.exit_json(changed=True, msg='Parcel activated')

      if parcel.stage == 'ACTIVATED':
        module.exit_json(changed=False, msg='Parcel already activated')

      # if we get down here, something is not right
      module.fail_json(msg='Invalid parcel state')

    # deploy nodes for workers, according to SERVICE_WORKER_MAP
    # also give them sane names and init zookeeper and kafka ones
    # which need id's specified
    elif action_a == 'deploy_service_worker_nodes':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      role_name = SERVICE_WORKER_MAP[service_a]['name']
      full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring']

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      nodes = [x for x in service.get_all_roles() if role_name in x.name]

      # if host already has the given group, we should skip it
      if host_map[host_a] in [x.hostRef.hostId for x in nodes]:
        module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name))
      # find out the highest id that currently exists
      else:
        node_names = [x.name for x in nodes]
        if len(node_names) == 0:
          # if no nodes, start numbering from 1
          node_i = 1
        else:
          # take the max number and add 1 to it
          node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1

        if service_name == 'ZOOKEEPER':
          role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a)
          # zookeeper needs a per-node ID in the configuration, so we set it now
          role.update_config({'serverId': node_i})
        elif service_name == 'KAFKA':
          role = service.create_role(full_role_name.format(node_i), role_name, host_a)
          # kafka needs a per-node ID in the configuration, so we set it now
          role.update_config({'broker.id': node_i})
        else:
          service.create_role(full_role_name.format(node_i), role_name, host_a)

        module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name))

    # deploy a service. just create it, don't do anything more
    # this is needed maily when we have to set service properties before role deployment
    elif action_a == 'deploy_service':
      name_a = module.params.get('name', None)

      if not name_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(name_a))
      service_name = SERVICE_MAP[name_a]
      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
        module.exit_json(changed=True, msg='{0} service created'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} service already exists'.format(service_name))

    # deploy the base hdfs roles (the namenode and secondary)
    # this doesn't create the service, as at least one datanode should already be added!
    # the format also requires certain properties to be set before we run it
    elif action_a == 'deploy_hdfs_base':
      nn_host_a = module.params.get('nn_host', None)
      sn_host_a = module.params.get('sn_host', None)

      changed = False

      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]

      # don't create a secondary namenode when:
      #- there is one that already exists
      #- there is a second namenode, which means we have HA and don't need a secondary
      if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles:
        hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a)
        changed = True

      # create a namenode and format it's FS
      # formating the namenode requires at least one datanode and secondary namenode already in the cluster!
      if not 'HDFS-NAMENODE' in hdfs_roles:
        hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a)
        for command in hdfs.format_hdfs('HDFS-NAMENODE'):
          if command.wait().success == False:
            module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage))
        changed = True

      module.exit_json(changed=changed, msg='Created HDFS service & NN roles')

    # enable HttpFS for HDFS
    # HUE require this for support HA in HDFS
    elif action_a == 'deploy_hdfs_httpfs':
      host_a = module.params.get('host', None)
      
      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]
      
      # don't install second instance of HttpFS
      if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0:
        module.exit_json(changed=False, msg='HDFS HttpFS service already exists')
       
      hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) 
        
      module.exit_json(changed=True, msg='HDFS HttpFS service created')
      
    # enable HA for HDFS
    # this deletes the secondary namenode and creates a second namenode in it's place
    # also, this spawns 3 journal node and 2 failover controller roles
    elif action_a == 'deploy_hdfs_ha':
      sn_host_a = module.params.get('sn_host', None)
      jn_dir_a = module.params.get('jn_dir', None)
      jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)]

      hdfs = cluster.get_service('HDFS')

      # if there's a second namenode, this means we already have HA enabled
      if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]:
        # this is bad and I should feel bad
        # jns is a list of dictionaries, each dict passes the required journalnode parameters
        jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': jn_dir_a, 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)]

        # this call is so long because we set some predictable names for the sevices
        command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER',
                                    active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2')

        children = command.wait().children
        for command_children in children:
          # The format command is expected to fail, since we already formated the namenode
          if command_children.name != 'Format' and command.success == False:
            module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for HDFS service')
      else:
        module.exit_json(changed=False, msg='HDFS HA already enabled')
    # enable HA for YARN
    elif action_a == 'deploy_rm_ha':
      sn_host_a = module.params.get('sn_host', None)

      yarn = cluster.get_service('YARN')

      # if there are two roles matching to this name, this means HA for YARN is enabled
      if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1:
        command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER')
        children = command.wait().children
        for command_children in children:
          if command.success == False:
            module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for YARN service')
      else:
        module.exit_json(changed=False, msg='YARN HA already enabled')

    # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP
    # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP
    elif action_a == 'deploy_base_roles':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      changed = False

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      service_roles = [x.name for x in service.get_all_roles()]

      # create each service from the map
      for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items():
        # check if role already exists, script cant compare it directly
        # after enabling HA on YARN roles will have random strings in names
        if len([0 for x in service_roles if match(role_name, x) != None]) == 0:
          service.create_role(role_name, cloudera_name, host_a)
          changed = True

          # init commmands
          if role_name in SERVICE_INIT_COMMANDS.keys():
            for command_to_run in SERVICE_INIT_COMMANDS[role_name]:
              # different handling of commands specified by name and
              # ones specified by an instance method
              if ismethod(command_to_run):
                command = command_to_run(service)
              else:
                command = service.service_command_by_name(command_to_run)

              if command.wait().success == False:
                module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage))

      if changed == True:
        module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name))

    # deploy configuration - it always return changed
    elif action_a == 'deploy_configuration':
      service_a = module.params.get('service', None)
      service_name = SERVICE_MAP[service_a]
      service = cluster.get_service(service_name)

      # deploying client configuration
      command = service.deploy_client_config()
      if command.wait().success == False:
        module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      module.exit_json(changed=True, msg='Configuration deployed')
        
    # set config values for a given service/role
    elif action_a == 'set_config':
      entity_a = module.params.get('entity', None)
      service_a = module.params.get('service', None)
      role_a = module.params.get('role', None)
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)

      if not service_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(service_a))

      # since management is handled differently, it needs a different service
      if service_a == 'management':
        service = manager.get_service()
      elif service_a == 'cm':
        service = manager
      else:
        service = cluster.get_service(SERVICE_MAP[service_a])

      # role and service configs are handled differently
      if entity_a == 'service':
        prev_config = service.get_config()
        curr_config = service.update_config({name_a: value_a})
        if service_a == 'cm':
          prev_config = [prev_config]
          curr_config = [curr_config]
        module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a]))

      elif entity_a == 'role':
        if not role_a in ROLE_MAP:
          module.fail_json(msg='Unknown role: {0}'.format(service))

        role = service.get_role_config_group(ROLE_MAP[role_a])
        prev_config = role.get_config()
        curr_config = role.update_config({name_a: value_a})
        module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a]))

      else:
        module.fail_json(msg='Invalid entity, must be one of service, role')

    # handle service state
    # currently this only can start/restart a service
    elif action_a == 'service':
      state_a = module.params.get('state', None)
      service_a = module.params.get('service', None)

      try:
        if service_a == 'cm':
          service = manager.get_service()
        else:
          service = cluster.get_service(SERVICE_MAP[service_a])
      except ApiException:
        module.fail_json(msg='Service does not exist')

      # when starting a service, we also deploy the client config for it
      if state_a == 'started':
        if service.serviceState == 'STARTED':
          module.exit_json(changed=False, msg='Service already running')
        method = service.start
        verb = "start"
      elif state_a == 'restarted':
        method = service.restart
        verb = "restart"

      try:
        command = service.deploy_client_config()
        if command.wait().success == False:
          module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      # since there is no way to check if a service handles client config deployments
      # we try our best and pass the exception if it doesn't
      except ApiException, AttributeError:
        pass

      method().wait()
      # we need to wait for cloudera checks to complete...
      # otherwise it will report as failing
      sleep(10)
      for i in range(24):
        sleep(10)
        service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
        if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
          break
      service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
      if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
        module.exit_json(changed=True, msg='Service {0} successful'.format(verb))
      else:
        module.fail_json(msg='Service {0} failed'.format(verb))

    # handle cluster
    # currently this only can restart
    elif action_a == 'cluster':
      state_a = module.params.get('state', None)

      if state_a == 'restarted':
        command = cluster.restart(redeploy_client_configuration=True)
        if command.wait().success == False:
          module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage))
        else:
          module.exit_json(changed=True, msg='Cluster restart successful')

    # Snapshot policy
    # only create is supported
    elif action_a == 'create_snapshot_policy':
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)
      service_a = module.params.get('service', None)
      service = cluster.get_service(SERVICE_MAP[service_a])
      payload=loads(value_a)
      # checking if policy already exists. Exception is expected when configure for the first time.
      try: 
        test = service.get_snapshot_policy(name_a)
        module.exit_json(changed=False, msg='Defined policy already exists')
      except ApiException:
        pass
      try:
        command = service.create_snapshot_policy(payload)
        module.exit_json(changed=True, msg='Snapshot policy was created.')
      except ApiException, AttributeError:
        module.fail_json(msg='ERROR in creating snapshot policy.')
Ejemplo n.º 8
0
cm_host = "localhost"
api = ApiResource(cm_host, username="******", password="******")

print "*** CLUSTERS ***"

clusters = None
# List clusters
for c in api.get_all_clusters():
    print "Cluster \"%s\" is version %s" % (c.name, c.version)
    clusters = c

print "*** HOSTS ***"

for host_ref in c.list_hosts():
    host = api.get_host(host_ref.hostId)
    print host.hostname

print "*** SERVICES ***"

hdfs = None
# List services & health info
for s in clusters.get_all_services():
    print "Service \"%s\" -- state \"%s\" -- health \"%s\"" % (
        s.name, s.serviceState, s.healthSummary)
    # Get HDFS service
    if 'hdfs' in s.type.lower():
        hdfs = s

print "*** HDFS Service checks (" + hdfs.serviceUrl + ") ***"
class RemoteDataLoad(object):
    """This is an implementation of the process to load a test-warehouse snapshot on
    a remote CM managed cluster. This script assumes that the warehouse snapshot was
    already downloaded and was either passed in as a parameter, or can be found by
    either inspecting the SNAPSHOT_DIR environment variable, or based on the WORKSPACE
    environment variable on a Jenkins build slave.

    The reason for the additional setup code is that in the local development
    environment it is assumed that $USER is HDFS superuser, which is not the case for
    remote deloyments.
    """

    def __init__(self, cm_host, options):
        logger.info("Starting remote data load...")
        self.options = options
        self.cm_host = cm_host

        # Gateway host can be used if the CM host is not configured as a Hadoop gateway
        self.gateway = options.gateway if options.gateway else cm_host
        self.impala_home = os.environ["IMPALA_HOME"]
        self.api = ApiResource(self.cm_host, username=options.cm_user,
                               password=options.cm_pass)

        # The API returns a list of clusters managed by the CM host. We're assuming
        # that this CM host was set up for the purpose of Impala testing on one
        # cluster, so the list should only have one value.
        self.cluster = self.api.get_all_clusters()[0]
        self.services = self.get_services()

        self.config = self.get_service_client_configurations()
        logger.info("Retrieved service configuration")
        logger.info(str(self.config))
        self.prepare()
        logger.info("IMPALA_HOME: {0}".format(self.impala_home))

    def get_hostname_for_ref(self, host_ref):
        """Translate the HostRef instance into the hostname."""
        return self.api.get_host(host_ref.hostId).hostname

    @staticmethod
    def get_or_default(config):
        return config.value if config.value else config.default

    def get_services(self):
        """Confirm that all services are running, and return service dict."""
        services = dict((s.type, s) for s in self.cluster.get_all_services())

        if set(REQUIRED_SERVICES) != set(services.keys()):
            missing_services = set(REQUIRED_SERVICES) - set(services.keys())
            logger.error("Services not installed: {0}".format(list(missing_services)))
            raise RuntimeError("Cluster not ready.")

        if not all(services[s].serviceState == 'STARTED' for s in services):
            stopped = [s for s in services if services[s].serviceState != "STARTED"]
            logger.error("Not all services started: {0}".format(stopped))
            raise RuntimeError("Cluster not ready.")

        return services

    @timing
    def download_client_config(self, cluster, service):
        """Download the client configuration zip for a particular cluster and service.

        Since cm_api does not provide a way to download the archive we build the URL
        manually and download the file. Once it downloaded the file the archive is
        extracted and its content is copied to the Hadoop configuration directories
        defined by Impala.
        """
        logger.info("Downloading client configuration for {0}".format(service.name))
        url = "http://{0}:7180/api/{1}/clusters/{2}/services/{3}/clientConfig".format(
            self.cm_host, CM_API_VERSION, urlquote(cluster.name), urlquote(service.name))
        path = mkdtemp()
        sh.curl(url, o=os.path.join(path, "clientConfig.zip"), _out=tee, _err=tee)
        current = os.getcwd()
        os.chdir(path)
        sh.unzip("clientConfig.zip")
        for root, _, file_names in os.walk("."):
            for filename in fnmatch.filter(file_names, "*.xml"):
                src = os.path.join(root, filename)
                dst = os.path.join(self.impala_home, "fe", "src", "test", "resources")
                logger.debug("Copying {0} to {1}".format(src, dst))
                shutil.copy(src, dst)
        os.chdir(current)

    # TODO: this may be available in tests/comparison/cluster.py
    def set_hive_warehouse_dir(self, cluster, service):
        logger.info("Setting the Hive Warehouse Dir")
        for service in self.api.get_all_clusters()[0].get_all_services():
            logger.info(service)
            if service.type == "HIVE":
              hive_config = { "hive_warehouse_directory" : HIVE_WAREHOUSE_DIR }
              service.update_config(hive_config)

    # TODO: This functionality should be more generally available to other infrastructure
    # code, rather than being quarantined in this script. See IMPALA-4367.
    @timing
    def get_service_client_configurations(self):
        """Download the client configurations necessary to upload data to the remote
        cluster. Unfortunately, the CM API does not allow downloading it so we have to
        iterate over the services and download the config for all of them.

        In addition, returns an options dictionary with settings required for data loading
        like the HS2 server, Impala hosts, Name node etc.

        Returns:
            A client-configuration dictionary, e.g.:

            {
                'hive_warehouse_directory': '/test-warehouse',
                'hs2': 'impala-test-cluster-1.gce.cloudera.com:10000',
                'impalad': ['impala-test-cluster-4.gce.cloudera.com:21000',
                            'impala-test-cluster-2.gce.cloudera.com:21000',
                            'impala-test-cluster-3.gce.cloudera.com:21000'],
                'metastore': 'impala-test-cluster-1.gce.cloudera.com:9083',
                'namenode': 'impala-test-cluster-1.gce.cloudera.com',
                'namenode_http': 'impala-test-cluster-1.gce.cloudera.com:20101',
                'kudu_master': 'impala-test-cluster-1.gce.cloudera.com'
            }
        """
        # Iterate overs services and find the information we need
        result = {}
        for service_type, service in self.services.iteritems():
            if service_type == "IMPALA":
                roles = service.get_roles_by_type("IMPALAD")
                impalads = []
                for r in roles:
                    rc_config = r.get_config("full")
                    hostname = self.get_hostname_for_ref(r.hostRef)
                    hs2_port = self.get_or_default(rc_config["beeswax_port"])
                    impalads.append("{0}:{1}".format(hostname, hs2_port))
                    result["impalad"] = impalads
            elif service_type == "HBASE":
                self.download_client_config(self.cluster, service)
            elif service_type == "HDFS":
                self.download_client_config(self.cluster, service)
                role = service.get_roles_by_type("NAMENODE")
                config = role[0].get_config("full")
                namenode = self.get_hostname_for_ref(role[0].hostRef)
                result["namenode"] = namenode
                result["namenode_http"] = "{0}:{1}".format(
                    namenode,
                    self.get_or_default(config["dfs_http_port"])
                )
            elif service_type == "HIVE":
                self.set_hive_warehouse_dir(self.cluster, service)
                self.download_client_config(self.cluster, service)
                hs2 = service.get_roles_by_type("HIVESERVER2")[0]
                rc_config = hs2.get_config("full")
                result["hive_warehouse_directory"] = self.get_or_default(
                    service.get_config("full")[0]["hive_warehouse_directory"])
                hostname = self.get_hostname_for_ref(hs2.hostRef)
                result["hs2"] = "{0}:{1}".format(hostname, self.get_or_default(
                    rc_config["hs2_thrift_address_port"]))

                # Get Metastore information
                ms = service.get_roles_by_type("HIVEMETASTORE")[0]
                rc_config = ms.get_config("full")
                result["metastore"] = "{0}:{1}".format(
                    self.get_hostname_for_ref(ms.hostRef),
                    self.get_or_default(rc_config["hive_metastore_port"])
                )
            elif service_type == "KUDU":
                # Service KUDU does not require a client configuration
                result["kudu_master"] = self.cm_host

        return result

    # TODO: This functionality should be more generally available to other infrastructure
    # code, rather than being quarantined in this script. See IMPALA-4367.
    @staticmethod
    def find_snapshot_file(snapshot_dir):
        """Given snapshot_directory, walks the directory tree until it finds a file
        matching the test-warehouse archive pattern."""
        for root, _, file_names in os.walk(snapshot_dir):
            for filename in fnmatch.filter(file_names, "test-warehouse-*-SNAPSHOT.tar.gz"):
                logger.info("Found Snapshot file {0}".format(filename))
                return os.path.join(root, filename)

    @timing
    def prepare(self):
        """Populate the environment of the process with the necessary values.

        In addition, it creates helper objects to run shell and SSH processes.
        """
        # Populate environment with required variables
        os.environ["HS2_HOST_PORT"] = self.config["hs2"]
        os.environ["HDFS_NN"] = self.config["namenode"]
        os.environ["IMPALAD"] = self.config["impalad"][0]
        os.environ["REMOTE_LOAD"] = "1"
        os.environ["HADOOP_USER_NAME"] = "hdfs"
        os.environ["TEST_WAREHOUSE_DIR"] = self.config["hive_warehouse_directory"]
        os.environ["KUDU_MASTER"] = self.config["kudu_master"]

        if self.options.snapshot_file is None:
            if "SNAPSHOT_DIR" in os.environ:
                snapshot_dir = os.environ["SNAPSHOT_DIR"]
            else:
                snapshot_dir = "{0}/testdata/test-warehouse-SNAPSHOT".format(
                    os.getenv("WORKSPACE"))
            if not os.path.isdir(snapshot_dir):
                err_msg = 'Snapshot directory "{0}" is not a valid directory'
                logger.error(err_msg.format(snapshot_dir))
                raise OSError("Could not find test-warehouse snapshot file.")

            logger.info("Snapshot directory: {0}".format(snapshot_dir))
            self.snapshot_file = self.find_snapshot_file(snapshot_dir)
        else:
            self.snapshot_file = self.options.snapshot_file

        # Prepare shortcuts for connecting to remote services
        self.gtw_ssh = ssh.bake("{0}@{1}".format(self.options.ssh_user, self.gateway),
                                "-oStrictHostKeyChecking=no",
                                "-oUserKnownHostsFile=/dev/null",
                                t=True, _out=tee, _err=tee)

        self.beeline = sh.beeline.bake(silent=False, outputformat="csv2", n="impala",
                                       u="jdbc:hive2://{0}/default".format(
                                           self.config["hs2"]))

        self.load_test_warehouse = sh.Command(
            "{0}/testdata/bin/load-test-warehouse-snapshot.sh".format(
                self.impala_home)).bake(
            _out=tee, _err=tee)

        self.create_load_data = sh.Command(
            "{0}/testdata/bin/create-load-data.sh".format(self.impala_home))

        self.main_impalad = self.config["impalad"][0]
        self.impala_shell = sh.Command("impala-shell.sh").bake(i=self.main_impalad,
                                                               _out=tee, _err=tee)

        self.python = sh.Command("impala-python").bake(u=True)
        self.compute_stats = sh.Command(
            "{0}/testdata/bin/compute-table-stats.sh".format(self.impala_home)).bake(
            _out=tee, _err=tee)

    @timing
    def load(self):
        """This method performs the actual data load. First it removes any known artifacts
        from the remote location. Next it drops potentially existing database from the
        Hive Metastore. Now, it invokes the load-test-warehouse-snapshot.sh and
        create-load-data.sh scripts with the appropriate parameters. The most important
        paramters are implicitly passed to the scripts as environment variables pointing
        to the remote HDFS, Hive and Impala.
        """
        exploration_strategy = self.options.exploration_strategy

        logger.info("Removing other databases")
        dblist = self.beeline(e="show databases;", _err=tee).stdout
        database_list = dblist.split()[1:]  # The first element is the header string
        for db in database_list:
            if db.strip() != "default":
                logger.debug("Dropping database %s", db)
                self.impala_shell(q="drop database if exists {0} cascade;".format(db))

        logger.info("Invalidating metadata in Impala")
        self.impala_shell(q="invalidate metadata;")

        logger.info("Removing previous remote {0}".format(
            self.config["hive_warehouse_directory"]))
        r = sh.hdfs.dfs("-rm", "-r", "-f", "{0}".format(
            self.config["hive_warehouse_directory"]))

        logger.info("Expunging HDFS trash")
        r = sh.hdfs.dfs("-expunge")

        logger.info("Uploading test warehouse snapshot")
        self.load_test_warehouse(self.snapshot_file)

        # TODO: We need to confirm that if we change any permissions, that we don't
        # affect any running tests. See IMPALA-4375.
        logger.info("Changing warehouse ownership")
        r = sh.hdfs.dfs("-chown", "-R", "impala:hdfs", "{0}".format(
            self.config["hive_warehouse_directory"]))
        sh.hdfs.dfs("-chmod", "-R", "g+rwx", "{0}".format(
            self.config["hive_warehouse_directory"]))
        sh.hdfs.dfs("-chmod", "1777", "{0}".format(
            self.config["hive_warehouse_directory"]))

        logger.info("Calling create_load_data.sh")
        # The $USER variable is used in the create-load-data.sh script for beeline
        # impersonation.
        new_env = os.environ.copy()
        new_env["LOGNAME"] = "impala"
        new_env["USER"] = "******"
        new_env["USERNAME"] = "******"

        # Regardless of whether we are in fact skipping the snapshot load or not,
        # we nonetheless always pass -skip_snapshot_load to create-load-data.sh.
        # This is because we have already loaded the snapshot earlier in this
        # script, so we don't want create-load-data.sh to invoke
        # load-test-warehouse-snapshot.sh again.
        #
        # It would actually be nice to be able to skip the snapshot load, but
        # because of the existing messiness of create-load-data.sh, we can't.
        # This invocation...
        #
        #    $ create-load-data.sh -skip_snapshot_load -exploration_strategy core
        #
        # ...results in this error:
        #
        #    Creating /test-warehouse HDFS directory \
        #    (logging to create-test-warehouse-dir.log)... FAILED
        #    'hadoop fs -mkdir /test-warehouse' failed. Tail of log:
        #    Log for command 'hadoop fs -mkdir /test-warehouse'
        #    mkdir: `/test-warehouse': File exists
        #
        # Similarly, even though we might pass in "core" as the exploration strategy,
        # because we aren't loading a metadata snapshot (i.e., -skip_metadata_load is
        # false), an exhaustive dataload will always be done. This again is the result
        # of logic in create-load-data.sh, which itself ignores the value passed in
        # for -exploration_strategy.
        #
        # See IMPALA-4399: "create-load-data.sh has bitrotted to some extent, and needs
        #                   to be cleaned up"
        create_load_data_args = ["-skip_snapshot_load", "-cm_host", self.cm_host,
                                 "-snapshot_file", self.snapshot_file,
                                 "-exploration_strategy", exploration_strategy]

        self.create_load_data(*create_load_data_args, _env=new_env, _out=tee, _err=tee)

        sh.hdfs.dfs("-chown", "-R", "impala:hdfs", "{0}".format(
            self.config["hive_warehouse_directory"]))

        logger.info("Re-load HBase data")
        # Manually load the HBase data last.
        self.python("{0}/bin/load-data.py".format(self.impala_home),
                    "--hive_warehouse_dir={0}".format(
                        self.config["hive_warehouse_directory"]),
                    "--table_formats=hbase/none",
                    "--hive_hs2_hostport={0}".format(self.config["hs2"]),
                    "--hdfs_namenode={0}".format(self.config["namenode"]),
                    "--exploration_strategy={0}".format(exploration_strategy),
                    workloads="functional-query",
                    force=True,
                    impalad=self.main_impalad,
                    _env=new_env,
                    _out=tee,
                    _err=tee)

        self.compute_stats()
        logger.info("Load data finished")

    # TODO: Should this be refactored out of this script? It has nothing to do with
    # data loading per se. If tests rely on the environment on the client being set
    # a certain way -- as in the prepare() method -- we may need to find another way
    # to deal with that. See IMPALA-4376.
    @timing
    def test(self):
        """Execute Impala's end-to-end tests against a remote cluster. All configuration
        paramters are picked from the cluster configuration that was fetched via the
        CM API."""

        # TODO: Running tests via runtest.py is currently not working against a remote
        # cluster (although running directly via py.test seems to work.) This method
        # may be refactored out of this file under IMPALA-4376, so for the time being,
        # raise a NotImplementedError.
        raise NotImplementedError

        # Overwrite the username to match the service user on the remote system and deal
        # with the assumption that in the local development environment the current user
        # is HDFS superuser as well.
        new_env = os.environ.copy()
        new_env["LOGNAME"] = "impala"
        new_env["USER"] = "******"
        new_env["USERNAME"] = "******"

        strategy = self.options.exploration_strategy
        logger.info("Running tests with exploration strategy {0}".format(strategy))
        run_tests = sh.Command("{0}/tests/run-tests.py".format(self.impala_home))
        run_tests("--skip_local_tests",
                  "--exploration_strategy={0}".format(strategy),
                  "--workload_exploration_strategy=functional-query:{0}".format(strategy),
                  "--namenode_http_address={0}".format(self.config["namenode_http"]),
                  "--hive_server2={0}".format(self.config["hs2"]),
                  "--metastore_server={0}".format(self.config["metastore"]),
                  "query_test",
                  maxfail=10,
                  impalad=",".join(self.config["impalad"]),
                  _env=new_env,
                  _out=tee,
                  _err=tee)
Ejemplo n.º 10
0
class RemoteDataLoad(object):
    """This is an implementation of the process to load a test-warehouse snapshot on
    a remote CM managed cluster. This script assumes that the warehouse snapshot was
    already downloaded and was either passed in as a parameter, or can be found by
    either inspecting the SNAPSHOT_DIR environment variable, or based on the WORKSPACE
    environment variable on a Jenkins build slave.

    The reason for the additional setup code is that in the local development
    environment it is assumed that $USER is HDFS superuser, which is not the case for
    remote deloyments.
    """
    def __init__(self, cm_host, options):
        logger.info("Starting remote data load...")
        self.options = options
        self.cm_host = cm_host

        # Gateway host can be used if the CM host is not configured as a Hadoop gateway
        self.gateway = options.gateway if options.gateway else cm_host
        self.impala_home = os.environ["IMPALA_HOME"]
        self.api = ApiResource(self.cm_host,
                               username=options.cm_user,
                               password=options.cm_pass)

        # The API returns a list of clusters managed by the CM host. We're assuming
        # that this CM host was set up for the purpose of Impala testing on one
        # cluster, so the list should only have one value.
        self.cluster = self.api.get_all_clusters()[0]
        self.services = self.get_services()

        self.config = self.get_service_client_configurations()
        logger.info("Retrieved service configuration")
        logger.info(str(self.config))
        self.prepare()
        logger.info("IMPALA_HOME: {0}".format(self.impala_home))

    def get_hostname_for_ref(self, host_ref):
        """Translate the HostRef instance into the hostname."""
        return self.api.get_host(host_ref.hostId).hostname

    @staticmethod
    def get_or_default(config):
        return config.value if config.value else config.default

    def get_services(self):
        """Confirm that all services are running, and return service dict."""
        services = dict((s.type, s) for s in self.cluster.get_all_services())

        if set(REQUIRED_SERVICES) != set(services.keys()):
            missing_services = set(REQUIRED_SERVICES) - set(services.keys())
            logger.error("Services not installed: {0}".format(
                list(missing_services)))
            raise RuntimeError("Cluster not ready.")

        if not all(services[s].serviceState == 'STARTED' for s in services):
            stopped = [
                s for s in services if services[s].serviceState != "STARTED"
            ]
            logger.error("Not all services started: {0}".format(stopped))
            raise RuntimeError("Cluster not ready.")

        return services

    @timing
    def download_client_config(self, cluster, service):
        """Download the client configuration zip for a particular cluster and service.

        Since cm_api does not provide a way to download the archive we build the URL
        manually and download the file. Once it downloaded the file the archive is
        extracted and its content is copied to the Hadoop configuration directories
        defined by Impala.
        """
        logger.info("Downloading client configuration for {0}".format(
            service.name))
        url = "http://{0}:7180/api/{1}/clusters/{2}/services/{3}/clientConfig".format(
            self.cm_host, CM_API_VERSION, urlquote(cluster.name),
            urlquote(service.name))
        path = mkdtemp()
        sh.curl(url,
                o=os.path.join(path, "clientConfig.zip"),
                _out=tee,
                _err=tee)
        current = os.getcwd()
        os.chdir(path)
        sh.unzip("clientConfig.zip")
        for root, _, file_names in os.walk("."):
            for filename in fnmatch.filter(file_names, "*.xml"):
                src = os.path.join(root, filename)
                dst = os.path.join(self.impala_home, "fe", "src", "test",
                                   "resources")
                logger.debug("Copying {0} to {1}".format(src, dst))
                shutil.copy(src, dst)
        os.chdir(current)

    # TODO: this may be available in tests/comparison/cluster.py
    def set_hive_warehouse_dir(self, cluster, service):
        logger.info("Setting the Hive Warehouse Dir")
        for service in self.api.get_all_clusters()[0].get_all_services():
            logger.info(service)
            if service.type == "HIVE":
                hive_config = {"hive_warehouse_directory": HIVE_WAREHOUSE_DIR}
                service.update_config(hive_config)

    # TODO: This functionality should be more generally available to other infrastructure
    # code, rather than being quarantined in this script. See IMPALA-4367.
    @timing
    def get_service_client_configurations(self):
        """Download the client configurations necessary to upload data to the remote
        cluster. Unfortunately, the CM API does not allow downloading it so we have to
        iterate over the services and download the config for all of them.

        In addition, returns an options dictionary with settings required for data loading
        like the HS2 server, Impala hosts, Name node etc.

        Returns:
            A client-configuration dictionary, e.g.:

            {
                'hive_warehouse_directory': '/test-warehouse',
                'hs2': 'impala-test-cluster-1.gce.cloudera.com:10000',
                'impalad': ['impala-test-cluster-4.gce.cloudera.com:21000',
                            'impala-test-cluster-2.gce.cloudera.com:21000',
                            'impala-test-cluster-3.gce.cloudera.com:21000'],
                'metastore': 'impala-test-cluster-1.gce.cloudera.com:9083',
                'namenode': 'impala-test-cluster-1.gce.cloudera.com',
                'namenode_http': 'impala-test-cluster-1.gce.cloudera.com:20101',
                'kudu_master': 'impala-test-cluster-1.gce.cloudera.com'
            }
        """
        # Iterate overs services and find the information we need
        result = {}
        for service_type, service in self.services.iteritems():
            if service_type == "IMPALA":
                roles = service.get_roles_by_type("IMPALAD")
                impalads = []
                for r in roles:
                    rc_config = r.get_config("full")
                    hostname = self.get_hostname_for_ref(r.hostRef)
                    hs2_port = self.get_or_default(rc_config["beeswax_port"])
                    impalads.append("{0}:{1}".format(hostname, hs2_port))
                    result["impalad"] = impalads
            elif service_type == "HBASE":
                self.download_client_config(self.cluster, service)
            elif service_type == "HDFS":
                self.download_client_config(self.cluster, service)
                role = service.get_roles_by_type("NAMENODE")
                config = role[0].get_config("full")
                namenode = self.get_hostname_for_ref(role[0].hostRef)
                result["namenode"] = namenode
                result["namenode_http"] = "{0}:{1}".format(
                    namenode, self.get_or_default(config["dfs_http_port"]))
            elif service_type == "HIVE":
                self.set_hive_warehouse_dir(self.cluster, service)
                self.download_client_config(self.cluster, service)
                hs2 = service.get_roles_by_type("HIVESERVER2")[0]
                rc_config = hs2.get_config("full")
                result["hive_warehouse_directory"] = self.get_or_default(
                    service.get_config("full")[0]["hive_warehouse_directory"])
                hostname = self.get_hostname_for_ref(hs2.hostRef)
                result["hs2"] = "{0}:{1}".format(
                    hostname,
                    self.get_or_default(rc_config["hs2_thrift_address_port"]))

                # Get Metastore information
                ms = service.get_roles_by_type("HIVEMETASTORE")[0]
                rc_config = ms.get_config("full")
                result["metastore"] = "{0}:{1}".format(
                    self.get_hostname_for_ref(ms.hostRef),
                    self.get_or_default(rc_config["hive_metastore_port"]))
            elif service_type == "KUDU":
                # Service KUDU does not require a client configuration
                result["kudu_master"] = self.cm_host

        return result

    # TODO: This functionality should be more generally available to other infrastructure
    # code, rather than being quarantined in this script. See IMPALA-4367.
    @staticmethod
    def find_snapshot_file(snapshot_dir):
        """Given snapshot_directory, walks the directory tree until it finds a file
        matching the test-warehouse archive pattern."""
        for root, _, file_names in os.walk(snapshot_dir):
            for filename in fnmatch.filter(file_names,
                                           "test-warehouse-*-SNAPSHOT.tar.gz"):
                logger.info("Found Snapshot file {0}".format(filename))
                return os.path.join(root, filename)

    @timing
    def prepare(self):
        """Populate the environment of the process with the necessary values.

        In addition, it creates helper objects to run shell and SSH processes.
        """
        # Populate environment with required variables
        os.environ["HS2_HOST_PORT"] = self.config["hs2"]
        os.environ["HDFS_NN"] = self.config["namenode"]
        os.environ["IMPALAD"] = self.config["impalad"][0]
        os.environ["REMOTE_LOAD"] = "1"
        os.environ["HADOOP_USER_NAME"] = "hdfs"
        os.environ["TEST_WAREHOUSE_DIR"] = self.config[
            "hive_warehouse_directory"]
        os.environ["KUDU_MASTER"] = self.config["kudu_master"]

        if self.options.snapshot_file is None:
            if "SNAPSHOT_DIR" in os.environ:
                snapshot_dir = os.environ["SNAPSHOT_DIR"]
            else:
                snapshot_dir = "{0}/testdata/test-warehouse-SNAPSHOT".format(
                    os.getenv("WORKSPACE"))
            if not os.path.isdir(snapshot_dir):
                err_msg = 'Snapshot directory "{0}" is not a valid directory'
                logger.error(err_msg.format(snapshot_dir))
                raise OSError("Could not find test-warehouse snapshot file.")

            logger.info("Snapshot directory: {0}".format(snapshot_dir))
            self.snapshot_file = self.find_snapshot_file(snapshot_dir)
        else:
            self.snapshot_file = self.options.snapshot_file

        # Prepare shortcuts for connecting to remote services
        self.gtw_ssh = ssh.bake("{0}@{1}".format(self.options.ssh_user,
                                                 self.gateway),
                                "-oStrictHostKeyChecking=no",
                                "-oUserKnownHostsFile=/dev/null",
                                t=True,
                                _out=tee,
                                _err=tee)

        self.beeline = sh.beeline.bake(silent=False,
                                       outputformat="csv2",
                                       n="impala",
                                       u="jdbc:hive2://{0}/default".format(
                                           self.config["hs2"]))

        self.load_test_warehouse = sh.Command(
            "{0}/testdata/bin/load-test-warehouse-snapshot.sh".format(
                self.impala_home)).bake(_out=tee, _err=tee)

        self.create_load_data = sh.Command(
            "{0}/testdata/bin/create-load-data.sh".format(self.impala_home))

        self.main_impalad = self.config["impalad"][0]
        self.impala_shell = sh.Command("impala-shell.sh").bake(
            i=self.main_impalad, _out=tee, _err=tee)

        self.python = sh.Command("impala-python").bake(u=True)
        self.compute_stats = sh.Command(
            "{0}/testdata/bin/compute-table-stats.sh".format(
                self.impala_home)).bake(_out=tee, _err=tee)

    @timing
    def load(self):
        """This method performs the actual data load. First it removes any known artifacts
        from the remote location. Next it drops potentially existing database from the
        Hive Metastore. Now, it invokes the load-test-warehouse-snapshot.sh and
        create-load-data.sh scripts with the appropriate parameters. The most important
        paramters are implicitly passed to the scripts as environment variables pointing
        to the remote HDFS, Hive and Impala.
        """
        exploration_strategy = self.options.exploration_strategy

        logger.info("Removing other databases")
        dblist = self.beeline(e="show databases;", _err=tee).stdout
        database_list = dblist.split()[
            1:]  # The first element is the header string
        for db in database_list:
            if db.strip() != "default":
                logger.debug("Dropping database %s", db)
                self.impala_shell(
                    q="drop database if exists {0} cascade;".format(db))

        logger.info("Invalidating metadata in Impala")
        self.impala_shell(q="invalidate metadata;")

        logger.info("Removing previous remote {0}".format(
            self.config["hive_warehouse_directory"]))
        r = sh.hdfs.dfs("-rm", "-r", "-f",
                        "{0}".format(self.config["hive_warehouse_directory"]))

        logger.info("Expunging HDFS trash")
        r = sh.hdfs.dfs("-expunge")

        logger.info("Uploading test warehouse snapshot")
        self.load_test_warehouse(self.snapshot_file)

        # TODO: We need to confirm that if we change any permissions, that we don't
        # affect any running tests. See IMPALA-4375.
        logger.info("Changing warehouse ownership")
        r = sh.hdfs.dfs("-chown", "-R", "impala:hdfs",
                        "{0}".format(self.config["hive_warehouse_directory"]))
        sh.hdfs.dfs("-chmod", "-R", "g+rwx",
                    "{0}".format(self.config["hive_warehouse_directory"]))
        sh.hdfs.dfs("-chmod", "1777",
                    "{0}".format(self.config["hive_warehouse_directory"]))

        logger.info("Calling create_load_data.sh")
        # The $USER variable is used in the create-load-data.sh script for beeline
        # impersonation.
        new_env = os.environ.copy()
        new_env["LOGNAME"] = "impala"
        new_env["USER"] = "******"
        new_env["USERNAME"] = "******"

        # Regardless of whether we are in fact skipping the snapshot load or not,
        # we nonetheless always pass -skip_snapshot_load to create-load-data.sh.
        # This is because we have already loaded the snapshot earlier in this
        # script, so we don't want create-load-data.sh to invoke
        # load-test-warehouse-snapshot.sh again.
        #
        # It would actually be nice to be able to skip the snapshot load, but
        # because of the existing messiness of create-load-data.sh, we can't.
        # This invocation...
        #
        #    $ create-load-data.sh -skip_snapshot_load -exploration_strategy core
        #
        # ...results in this error:
        #
        #    Creating /test-warehouse HDFS directory \
        #    (logging to create-test-warehouse-dir.log)... FAILED
        #    'hadoop fs -mkdir /test-warehouse' failed. Tail of log:
        #    Log for command 'hadoop fs -mkdir /test-warehouse'
        #    mkdir: `/test-warehouse': File exists
        #
        # Similarly, even though we might pass in "core" as the exploration strategy,
        # because we aren't loading a metadata snapshot (i.e., -skip_metadata_load is
        # false), an exhaustive dataload will always be done. This again is the result
        # of logic in create-load-data.sh, which itself ignores the value passed in
        # for -exploration_strategy.
        #
        # See IMPALA-4399: "create-load-data.sh has bitrotted to some extent, and needs
        #                   to be cleaned up"
        create_load_data_args = [
            "-skip_snapshot_load", "-cm_host", self.cm_host, "-snapshot_file",
            self.snapshot_file, "-exploration_strategy", exploration_strategy
        ]

        self.create_load_data(*create_load_data_args,
                              _env=new_env,
                              _out=tee,
                              _err=tee)

        sh.hdfs.dfs("-chown", "-R", "impala:hdfs",
                    "{0}".format(self.config["hive_warehouse_directory"]))

        logger.info("Re-load HBase data")
        # Manually load the HBase data last.
        self.python("{0}/bin/load-data.py".format(self.impala_home),
                    "--hive_warehouse_dir={0}".format(
                        self.config["hive_warehouse_directory"]),
                    "--table_formats=hbase/none",
                    "--hive_hs2_hostport={0}".format(self.config["hs2"]),
                    "--hdfs_namenode={0}".format(self.config["namenode"]),
                    "--exploration_strategy={0}".format(exploration_strategy),
                    workloads="functional-query",
                    force=True,
                    impalad=self.main_impalad,
                    _env=new_env,
                    _out=tee,
                    _err=tee)

        self.compute_stats()
        logger.info("Load data finished")

    # TODO: Should this be refactored out of this script? It has nothing to do with
    # data loading per se. If tests rely on the environment on the client being set
    # a certain way -- as in the prepare() method -- we may need to find another way
    # to deal with that. See IMPALA-4376.
    @timing
    def test(self):
        """Execute Impala's end-to-end tests against a remote cluster. All configuration
        paramters are picked from the cluster configuration that was fetched via the
        CM API."""

        # TODO: Running tests via runtest.py is currently not working against a remote
        # cluster (although running directly via py.test seems to work.) This method
        # may be refactored out of this file under IMPALA-4376, so for the time being,
        # raise a NotImplementedError.
        raise NotImplementedError

        # Overwrite the username to match the service user on the remote system and deal
        # with the assumption that in the local development environment the current user
        # is HDFS superuser as well.
        new_env = os.environ.copy()
        new_env["LOGNAME"] = "impala"
        new_env["USER"] = "******"
        new_env["USERNAME"] = "******"

        strategy = self.options.exploration_strategy
        logger.info(
            "Running tests with exploration strategy {0}".format(strategy))
        run_tests = sh.Command("{0}/tests/run-tests.py".format(
            self.impala_home))
        run_tests(
            "--skip_local_tests",
            "--exploration_strategy={0}".format(strategy),
            "--workload_exploration_strategy=functional-query:{0}".format(
                strategy),
            "--namenode_http_address={0}".format(self.config["namenode_http"]),
            "--hive_server2={0}".format(self.config["hs2"]),
            "--metastore_server={0}".format(self.config["metastore"]),
            "query_test",
            maxfail=10,
            impalad=",".join(self.config["impalad"]),
            _env=new_env,
            _out=tee,
            _err=tee)
Ejemplo n.º 11
0
    addHost=cluster.add_hosts(newHostList)
    //Waiting for 5 minutes so that the parcels get downloaded & distributed & activated
    print "++Wait Time++ 300 seconds"
    time.sleep(300)
     
 
if __name__ == '__main__':
 
    api = ApiResource(clouderaManagerHost, clouderaManagerPort, clouderaManagerUserName, clouderaManagerPassword, use_tls=clouderaManagerHTTPS)
    cluster = api.get_cluster(clusterDisplayName)
    hostlist=[]
 
 
    for hostName in api.get_all_hosts():
        if hostName.hostname in newHosts:
                host = api.get_host(hostName.hostId)
                hostlist.append(host.hostId)
    addHost=addHostToCluster(api,cluster,hostlist)
    start_time=time.time()
    parcel=cluster.get_parcel('CDH',parcelVersion)
     
    //Check for parcel deployment errors.
    print "++ Checking Parcel Deployement"
    while True:
        if parcel.stage == 'ACTIVATED':
            print "CDH Parcels Activated"
            break
        if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
        print parcel.stage
        print "progress: %s / %s" % (parcel.state.progress, parcel.state.totalProgress)
Ejemplo n.º 12
0
def main():
  module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS))

  api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=10)
  cluster_name = CLUSTER_NAME

  manager = api.get_cloudera_manager()

  action_a = module.params.get('action', None)

  if action_a == 'create_cluster':
    license_a = module.params.get('license', None)
    version_a = module.params.get('version', None)

    cluster_list = [x.name for x in api.get_all_clusters()]
    if cluster_name in cluster_list:
      module.exit_json(changed=False, msg='Cluster exists')
    else:
      cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a)
      if license_a == None:
        manager.begin_trial()
      else:
        manager.update_license(license_a.decode('base64'))
      module.exit_json(changed=True, msg='Cluster created')
  elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster','create_snapshot_policy']:
    # more complicated actions that need a created cluster go here
    cluster = api.get_cluster(cluster_name)
    host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts())

    # adds a host to the cluster
    # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal
    if action_a == 'add_host':
      host_a = module.params.get('host', None)

      host_list = host_map.keys()
      if host_a in host_list:
        module.exit_json(changed=False, msg='Host already in cluster')
      else:
        try:
          cluster.add_hosts([host_a])
        except ApiException:
          # if a host isn't there, it could be because the agent didn't manage to connect yet
          # so let's wait a moment for it
          sleep(120)
          cluster.add_hosts([host_a])

        module.exit_json(changed=True, msg='Host added')

    # create management service and set it's basic configuration
    # this needs a separate function since management is handled
    # differently than the rest of services
    elif action_a == 'create_mgmt':
      host_a = module.params.get('host', None)

      # getting the management service is the only way to check if mgmt exists
      # an exception means there isn't one
      try:
        mgmt = manager.get_service()
        module.exit_json(changed=False, msg='Mgmt service already exists')
      except ApiException:
        pass

      mgmt = manager.create_mgmt_service(ApiServiceSetupInfo())

      # this is ugly... and I see no good way to unuglify it
      firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")
      reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")

      # since there is no easy way of configuring the manager... let's do it here :(
      role_conf = defaultdict(dict)
      role_conf['ACTIVITYMONITOR'] = {
          'firehose_database_host': '{0}:7432'.format(host_a),
          'firehose_database_user': '******',
          'firehose_database_password': firehose_passwd,
          'firehose_database_type': 'postgresql',
          'firehose_database_name': 'amon',
          'firehose_heapsize': '268435456',
      }
      role_conf['EVENTSERVER'] = {
          'event_server_heapsize': '215964392'
      }
      role_conf['REPORTSMANAGER'] = {
          'headlamp_database_host': '{0}:7432'.format(host_a),
          'headlamp_database_user': '******',
          'headlamp_database_password': reports_passwd,
          'headlamp_database_type': 'postgresql',
          'headlamp_database_name': 'rman',
          'headlamp_heapsize': '215964392',
      }

      roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER']
      # create mangement roles
      for role in roles:
        mgmt.create_role('{0}-1'.format(role), role, host_map[host_a])

      # update configuration of each
      for group in mgmt.get_all_role_config_groups():
        group.update_config(role_conf[group.roleType])

      mgmt.start().wait()
      # after starting this service needs time to spin up
      sleep(30)
      module.exit_json(changed=True, msg='Mgmt created and started')

    # deploy a given parcel on all hosts in the cluster
    # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4
    elif action_a == 'deploy_parcel':
      name_a = module.params.get('name', None)
      version_a = module.params.get('version', None)

      if "latest" in version_a:
        available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a]
        if "-latest" in version_a:
          version_substr = match('(.+?)-latest', version_a).group(1)
        # if version is just "latest", try to check everything
        else:
          version_substr = ".*"
        try:
          [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None]
        except ValueError:
          module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions))
      else:
        version_parcel = version_a

      # we now go through various stages of getting the parcel
      # as there is no built-in way of waiting for an operation to complete
      # we use loops with sleep to get it done
      parcel = cluster.get_parcel(name_a, version_parcel)
      if parcel.stage == 'AVAILABLE_REMOTELY':
        parcel.start_download()

        while parcel.stage != 'DOWNLOADED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          sleep(10)

      if parcel.stage == 'DOWNLOADED':
        parcel.start_distribution()

        while parcel.stage != 'DISTRIBUTED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          # sleep while hosts report problems after the download
          for i in range(12):
            sleep(10)
            if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
              break

      # since parcels are distributed automatically when a new host is added to a cluster
      # we can encounter the ,,ACTIVATING'' stage then
      if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING':
        if parcel.stage == 'DISTRIBUTED':
          parcel.activate()

        while parcel.stage != 'ACTIVATED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          # this sleep has to be large because although the operation is very fast
          # it makes the management and cloudera hosts go bonkers, failing all of the health checks
          sleep(10)

        # sleep while hosts report problems after the distribution
        for i in range(60):
          sleep(10)
          if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
            break

        module.exit_json(changed=True, msg='Parcel activated')

      if parcel.stage == 'ACTIVATED':
        module.exit_json(changed=False, msg='Parcel already activated')

      # if we get down here, something is not right
      module.fail_json(msg='Invalid parcel state')

    # deploy nodes for workers, according to SERVICE_WORKER_MAP
    # also give them sane names and init zookeeper and kafka ones
    # which need id's specified
    elif action_a == 'deploy_service_worker_nodes':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      role_name = SERVICE_WORKER_MAP[service_a]['name']
      full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring']

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      nodes = [x for x in service.get_all_roles() if role_name in x.name]

      # if host already has the given group, we should skip it
      if host_map[host_a] in [x.hostRef.hostId for x in nodes]:
        module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name))
      # find out the highest id that currently exists
      else:
        node_names = [x.name for x in nodes]
        if len(node_names) == 0:
          # if no nodes, start numbering from 1
          node_i = 1
        else:
          # take the max number and add 1 to it
          node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1

        if service_name == 'ZOOKEEPER':
          role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a)
          # zookeeper needs a per-node ID in the configuration, so we set it now
          role.update_config({'serverId': node_i})
        elif service_name == 'KAFKA':
          role = service.create_role(full_role_name.format(node_i), role_name, host_a)
          # kafka needs a per-node ID in the configuration, so we set it now
          role.update_config({'broker.id': node_i})
        else:
          service.create_role(full_role_name.format(node_i), role_name, host_a)

        module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name))

    # deploy a service. just create it, don't do anything more
    # this is needed maily when we have to set service properties before role deployment
    elif action_a == 'deploy_service':
      name_a = module.params.get('name', None)

      if not name_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(name_a))
      service_name = SERVICE_MAP[name_a]
      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
        module.exit_json(changed=True, msg='{0} service created'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} service already exists'.format(service_name))

    # deploy the base hdfs roles (the namenode and secondary)
    # this doesn't create the service, as at least one datanode should already be added!
    # the format also requires certain properties to be set before we run it
    elif action_a == 'deploy_hdfs_base':
      nn_host_a = module.params.get('nn_host', None)
      sn_host_a = module.params.get('sn_host', None)

      changed = False

      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]

      # don't create a secondary namenode when:
      #- there is one that already exists
      #- there is a second namenode, which means we have HA and don't need a secondary
      if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles:
        hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a)
        changed = True

      # create a namenode and format it's FS
      # formating the namenode requires at least one datanode and secondary namenode already in the cluster!
      if not 'HDFS-NAMENODE' in hdfs_roles:
        hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a)
        for command in hdfs.format_hdfs('HDFS-NAMENODE'):
          if command.wait().success == False:
            module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage))
        changed = True

      module.exit_json(changed=changed, msg='Created HDFS service & NN roles')

    # enable HttpFS for HDFS
    # HUE require this for support HA in HDFS
    elif action_a == 'deploy_hdfs_httpfs':
      host_a = module.params.get('host', None)
      
      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]
      
      # don't install second instance of HttpFS
      if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0:
        module.exit_json(changed=False, msg='HDFS HttpFS service already exists')
       
      hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) 
        
      module.exit_json(changed=True, msg='HDFS HttpFS service created')
      
    # enable HA for HDFS
    # this deletes the secondary namenode and creates a second namenode in it's place
    # also, this spawns 3 journal node and 2 failover controller roles
    elif action_a == 'deploy_hdfs_ha':
      sn_host_a = module.params.get('sn_host', None)
      jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)]

      hdfs = cluster.get_service('HDFS')

      # if there's a second namenode, this means we already have HA enabled
      if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]:
        # this is bad and I should feel bad
        # jns is a list of dictionaries, each dict passes the required journalnode parameters
        jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': '/data0/hadoop/journal', 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)]

        # this call is so long because we set some predictable names for the sevices
        command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER',
                                    active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2')

        children = command.wait().children
        for command_children in children:
          # The format command is expected to fail, since we already formated the namenode
          if command_children.name != 'Format' and command.success == False:
            module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for HDFS service')
      else:
        module.exit_json(changed=False, msg='HDFS HA already enabled')
    # enable HA for YARN
    elif action_a == 'deploy_rm_ha':
      sn_host_a = module.params.get('sn_host', None)

      yarn = cluster.get_service('YARN')

      # if there are two roles matching to this name, this means HA for YARN is enabled
      if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1:
        command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER')
        children = command.wait().children
        for command_children in children:
          if command.success == False:
            module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for YARN service')
      else:
        module.exit_json(changed=False, msg='YARN HA already enabled')

    # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP
    # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP
    elif action_a == 'deploy_base_roles':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      changed = False

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      service_roles = [x.name for x in service.get_all_roles()]

      # create each service from the map
      for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items():
        # check if role already exists, script cant compare it directly
        # after enabling HA on YARN roles will have random strings in names
        if len([0 for x in service_roles if match(role_name, x) != None]) == 0:
          service.create_role(role_name, cloudera_name, host_a)
          changed = True

          # init commmands
          if role_name in SERVICE_INIT_COMMANDS.keys():
            for command_to_run in SERVICE_INIT_COMMANDS[role_name]:
              # different handling of commands specified by name and
              # ones specified by an instance method
              if ismethod(command_to_run):
                command = command_to_run(service)
              else:
                command = service.service_command_by_name(command_to_run)

              if command.wait().success == False:
                module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage))

      if changed == True:
        module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name))

    # set config values for a given service/role
    elif action_a == 'set_config':
      entity_a = module.params.get('entity', None)
      service_a = module.params.get('service', None)
      role_a = module.params.get('role', None)
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)

      if not service_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(service_a))

      # since management is handled differently, it needs a different service
      if service_a == 'management':
        service = manager.get_service()
      elif service_a == 'cm':
        service = manager
      else:
        service = cluster.get_service(SERVICE_MAP[service_a])

      # role and service configs are handled differently
      if entity_a == 'service':
        prev_config = service.get_config()
        curr_config = service.update_config({name_a: value_a})
        if service_a == 'cm':
          prev_config = [prev_config]
          curr_config = [curr_config]
        module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a]))

      elif entity_a == 'role':
        if not role_a in ROLE_MAP:
          module.fail_json(msg='Unknown role: {0}'.format(service))

        role = service.get_role_config_group(ROLE_MAP[role_a])
        prev_config = role.get_config()
        curr_config = role.update_config({name_a: value_a})
        module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a]))

      else:
        module.fail_json(msg='Invalid entity, must be one of service, role')

    # handle service state
    # currently this only can start/restart a service
    elif action_a == 'service':
      state_a = module.params.get('state', None)
      service_a = module.params.get('service', None)

      try:
        if service_a == 'cm':
          service = manager.get_service()
        else:
          service = cluster.get_service(SERVICE_MAP[service_a])
      except ApiException:
        module.fail_json(msg='Service does not exist')

      # when starting a service, we also deploy the client config for it
      if state_a == 'started':
        if service.serviceState == 'STARTED':
          module.exit_json(changed=False, msg='Service already running')
        method = service.start
        verb = "start"
      elif state_a == 'restarted':
        method = service.restart
        verb = "restart"

      try:
        command = service.deploy_client_config()
        if command.wait().success == False:
          module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      # since there is no way to check if a service handles client config deployments
      # we try our best and pass the exception if it doesn't
      except ApiException, AttributeError:
        pass

      method().wait()
      # we need to wait for cloudera checks to complete...
      # otherwise it will report as failing
      sleep(10)
      for i in range(24):
        sleep(10)
        service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
        if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
          break
      service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
      if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
        module.exit_json(changed=True, msg='Service {0} successful'.format(verb))
      else:
        module.fail_json(msg='Service {0} failed'.format(verb))

    # handle cluster
    # currently this only can restart
    elif action_a == 'cluster':
      state_a = module.params.get('state', None)

      if state_a == 'restarted':
        command = cluster.restart(redeploy_client_configuration=True)
        if command.wait().success == False:
          module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage))
        else:
          module.exit_json(changed=True, msg='Cluster restart successful')

    # Snapshot policy
    # only create is supported
    elif action_a == 'create_snapshot_policy':
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)
      service_a = module.params.get('service', None)
      service = cluster.get_service(SERVICE_MAP[service_a])
      payload=loads(value_a)
      # checking if policy already exists. Exception is expected when configure for the first time.
      try: 
        test = service.get_snapshot_policy(name_a)
        module.exit_json(changed=False, msg='Defined policy already exists')
      except ApiException:
        pass
      try:
        command = service.create_snapshot_policy(payload)
        module.exit_json(changed=True, msg='Snapshot policy was created.')
      except ApiException, AttributeError:
        module.fail_json(msg='ERROR in creating snapshot policy.')
Ejemplo n.º 13
0
def api_data_collection(request):
    """
        Application information collection restful api. Query completed application information on specific conditions and accumulate it.
        @method: GET
        @param from_time: Application finish time after this time. format : "%d/%m/%Y %H:%M". time zone=UTC+8
        @param end_time: Application finish time before this time. format : "%d/%m/%Y %H:%M". time zone=UTC+8
        @param queue_name: Query completed application information on specific queue name.
        @param app_type: Query completed application information on specific application type.
        @param app_state: Query completed application information on specific application states. specified as a comma-separated list. ex: FINISHED,FAILED,KILLED
        @return: json data
                    { "success":False, "message":"error message" }
                    { "success":True, "message": { "queue_view":{...}, "group_view":{...} } }
        """
    if request.method == "GET":
        response = {'success':False, 'message':''}
        filter_dict = {}
        if "queue_name" in request.GET:
            filter_dict['queue_name'] = request.GET.get('queue_name')
        if "app_type" in request.GET:
            filter_dict['app_type'] = request.GET.get('app_type')
        if "app_state" in request.GET:
            filter_dict['app_state'] = request.GET.get('app_state').split(',')
        #
        # time zone = Asia/Taipei = UTC+8
        from_time = datetime.strptime(request.GET.get('from_time'), "%d/%m/%Y %H:%M") - timedelta(hours=8)
        to_time = datetime.strptime(request.GET.get('end_time'), "%d/%m/%Y %H:%M") - timedelta(hours=8)
        #
        # get config
        config = ConfigParser.ConfigParser()
        config.read( os.path.join(settings.BASE_DIR, "cluster.ini") )
        cm_host = config.get("CM", "cm.host")
        cm_port = config.get("CM", "cm.port")
        cm_version = config.get("CM", "cm.version")
        cm_username = config.get("CM", "cm.username")
        cm_password = config.get("CM", "cm.password")
        #
        cluster_name = config.get("Cluster", "cluster.name")
        yarn_name = config.get("Cluster", "cluster.yarn.name")
        #
        ldap_host = config.get("Ldap", "ldap.host")
        ldap_username = config.get("Ldap", "ldap.username")
        ldap_password = config.get("Ldap", "ldap.password")
        #
        # get active resource manager info
        try:
            cm_api = ApiResource( cm_host, int(cm_port), username=cm_username, password=cm_password, version=int(cm_version) )
            cm_cluster_obj = cm_api.get_cluster(name=cluster_name)
            cm_yarn_obj = cm_cluster_obj.get_service(name=yarn_name)
            #
            find_active_rm = False
            for rm in cm_yarn_obj.get_roles_by_type(role_type="RESOURCEMANAGER"):
                if rm.haStatus == "ACTIVE":
                    host = cm_api.get_host(rm.hostRef.hostId)
                    active_rm_ip = host.ipAddress
                    active_rm_port = 8088
                    find_active_rm = True
            #
            if not find_active_rm:
                message = "can not find active rm"
                print( "[ERROR] " + message )
                response['success'] = False
                response['message'] = message
                return HttpResponse( json.dumps(response) )
        except Exception, e:
            message = "can not get cm yarn object"
            print( "[ERROR] " + message + str(e) )
            response['success'] = False
            response['message'] = message
            return HttpResponse( json.dumps(response) )
        #
        # all application statistics
        statistics_response = applications_statistics(active_rm_ip, active_rm_port, from_time, to_time, filter_dict)
        if statistics_response['success']:
            #
            # create ldap connection. access ldap to get group of account
            if create_ldap_connection(ldap_host, ldap_username, ldap_password):
                ldap_connection = create_ldap_connection(ldap_host, ldap_username, ldap_password)
            else:
                message = "can not connect to ldap://" + ldap_host
                response['success'] = False
                response['message'] = message
                return HttpResponse( json.dumps(response) )
            #
            # init queue view result & group view result
            queue_view_final_result = statistics_response['message']
            group_view_final_result = {}
            #
            #
            # add group information to queue view result and accumulate the result by group
            for queue, queue_info in queue_view_final_result.items():
                #
                queue_view_final_result[queue]['group'] = ''
                # queue naming : root.SYSTEM.<account> , root.PERSONAL.<account>
                m = re.match(r"(?P<root>\w+)\.(?P<second>\w+)\.(?P<third>\w+)", queue)
                if m and m.group('root') == 'root' and ( m.group('second') == 'SYSTEM' or m.group('second') == 'PERSONAL' ):
                    queue_view_final_result[queue]['account'] = m.group('third')
                    group_query_result = query_group_of_user(ldap_connection, queue_view_final_result[queue]['account'])
                    group = group_query_result['group']
                    project_name = group_query_result['name']
                    queue_view_final_result[queue]['group'] = group
                    if not group_view_final_result.has_key(group):
                        group_view_final_result[group] = { 'apps':{}, 'queues':[], 'name':project_name }
                    group_view_final_result[group]['queues'].append(queue)
                    #
                    for app_type, app_info in queue_info['apps'].items():
                        for app_state, data in app_info['final_status'].items():
                            if not group_view_final_result[group]['apps'].has_key(app_state):
                                group_view_final_result[group]['apps'][app_state] = {}
                            for key in data:
                                if not group_view_final_result[group]['apps'][app_state].has_key(key):
                                    group_view_final_result[group]['apps'][app_state][key] = data[key]
                                else:
                                    group_view_final_result[group]['apps'][app_state][key] += data[key]
            #
            # after finish to accumulate all result, unbind ldap connection
            ldap_connection.unbind()
        else:
            response['success'] = False
            response['message'] = statistics_response['message']
            return HttpResponse( json.dumps(response) )
        #
        # transform duration type from datetime.timedelta to string
        queue_view_final_result = transform_queue_view_response(queue_view_final_result)
        group_view_final_result = transform_project_view_response(group_view_final_result)
        #
        response['success'] = True
        response['message'] = {}
        response['message']['queue_view'] = queue_view_final_result
        response['message']['group_view'] = group_view_final_result
        print json.dumps("[DEBUG] response = " + json.dumps(response))
        return HttpResponse( json.dumps(response) )
Ejemplo n.º 14
0
class ClouderaManager(object):
    """
    The complete orchestration of a cluster from start to finish assuming all the hosts are
    configured and Cloudera Manager is installed with all the required databases setup.

    Handle all the steps required in creating a cluster. All the functions are built to function
    idempotently. So you should be able to resume from any failed step but running thru the
    __class__.setup()
    """

    def __init__(self, module, config, trial=False, license_txt=None):
        self.api = ApiResource(config['cm']['host'], username=config['cm']['username'],
                               password=config['cm']['password'])
        self.manager = self.api.get_cloudera_manager()
        self.config = config
        self.module = module
        self.trial = trial
        self.license_txt = license_txt
        self.cluster = None

    def enable_license(self):
        """
        Enable the requested license, either it's trial mode or a full license is entered and
        registered.
        """
        try:
            _license = self.manager.get_license()
        except ApiException:
            print_json(type="LICENSE", msg="Enabling license")
            if self.trial:
                self.manager.begin_trial()
            else:
                if license_txt is not None:
                    self.manager.update_license(license_txt)
                else:
                    fail(self.module, 'License should be provided or trial should be specified')

            try:
                _license = self.manager.get_license()
            except ApiException:
                fail(self.module, 'Failed enabling license')
        print_json(type="LICENSE",
                   msg="Owner: {}, UUID: {}".format(_license.owner, _license.uuid))

    def create_cluster(self):
        """
        Create a cluster and add hosts to the cluster. A new cluster is only created
        if another one doesn't exist with the same name.
        """
        print_json(type="CLUSTER", msg="Creating cluster")
        cluster_config = self.config['cluster']
        try:
            self.cluster = self.api.get_cluster(cluster_config['name'])
        except ApiException:
            print_json(type="CLUSTER",
                       msg="Creating Cluster entity: {}".format(cluster_config['name']))
            self.cluster = self.api.create_cluster(cluster_config['name'],
                                                   cluster_config['version'],
                                                   cluster_config['fullVersion'])

        cluster_hosts = [self.api.get_host(host.hostId).hostname
                         for host in self.cluster.list_hosts()]
        hosts = []
        for host in cluster_config['hosts']:
            if host not in cluster_hosts:
                hosts.append(host)
        self.cluster.add_hosts(hosts)

    def activate_parcels(self):
        print_json(type="PARCELS", msg="Setting up parcels")
        for parcel_cfg in self.config['parcels']:
            parcel = Parcels(self.module, self.manager, self.cluster,
                             parcel_cfg.get('version'), parcel_cfg.get('repo'),
                             parcel_cfg.get('product', 'CDH'))
            parcel.download()
            parcel.distribute()
            parcel.activate()

    @retry(attempts=20, delay=5)
    def wait_inspect_hosts(self, cmd):
        """
        Inspect all the hosts. Basically wait till the check completes on all hosts.

        :param cmd: A command instance used for tracking the status of the command
        """
        print_json(type="HOSTS", msg="Inspecting hosts")
        cmd = cmd.fetch()
        if cmd.success is None:
            raise ApiException("Waiting on command {} to finish".format(cmd))
        elif not cmd.success:
            if (cmd.resultMessage is not None and
                    'is not currently available for execution' in cmd.resultMessage):
                raise ApiException('Retry Command')
            fail(self.module, 'Host inspection failed')
        print_json(type="HOSTS", msg="Host inspection completed: {}".format(cmd.resultMessage))

    def deploy_mgmt_services(self):
        """
        Configure, deploy and start all the Cloudera Management Services.
        """
        print_json(type="MGMT", msg="Deploying Management Services")
        try:
            mgmt = self.manager.get_service()
            if mgmt.serviceState == 'STARTED':
                return
        except ApiException:
            print_json(type="MGMT", msg="Management Services don't exist. Creating.")
            mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo())

        for role in config['services']['MGMT']['roles']:
            if not len(mgmt.get_roles_by_type(role['group'])) > 0:
                print_json(type="MGMT", msg="Creating role for {}".format(role['group']))
                mgmt.create_role('{}-1'.format(role['group']), role['group'], role['hosts'][0])

        for role in config['services']['MGMT']['roles']:
            role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format(role['group']))
            role_group.update_config(role.get('config', {}))

        mgmt.start().wait()
        if self.manager.get_service().serviceState == 'STARTED':
            print_json(type="MGMT", msg="Management Services started")
        else:
            fail(self.module, "[MGMT] Cloudera Management services didn't start up properly")

    def service_orchestrate(self, services):
        """
        Create, pre-configure provided list of services
        Stop/Start those services
        Perform and post service startup actions

        :param services: List of Services to perform service specific actions
        """
        service_classes = []

        # Create and pre-configure provided services
        for service in services:
            service_config = self.config['services'].get(service.upper())
            if service_config:
                svc = getattr(sys.modules[__name__], service)(self.cluster, service_config)
                if not svc.started:
                    svc.deploy()
                    svc.pre_start()
                service_classes.append(svc)

        print_json(type="CLUSTER", msg="Starting services: {} on Cluster".format(services))

        # Deploy all the client configs, since some of the services depend on other services
        # and is essential that the client configs are in place
        self.cluster.deploy_client_config()

        # Start each service and run the post_start actions for each service
        for svc in service_classes:
            # Only go thru the steps if the service is not yet started. This helps with
            # re-running the script after fixing errors
            if not svc.started:
                svc.start()
                svc.post_start()

    def setup(self):
        # TODO(rnirmal): Cloudera Manager SSL?

        # Enable a full license or start a trial
        self.enable_license()

        # Create the cluster entity and associate hosts
        self.create_cluster()

        # Download and activate the parcels
        self.activate_parcels()

        # Inspect all the hosts
        self.wait_inspect_hosts(self.manager.inspect_hosts())

        # Create Management services
        self.deploy_mgmt_services()

        # Configure and Start base services
        self.service_orchestrate(BASE_SERVICES)

        # Configure and Start remaining services
        self.service_orchestrate(ADDITIONAL_SERVICES)
Ejemplo n.º 15
0
class CmCluster(Cluster):

  def __init__(self, host_name, port=None, user="******", password="******",
               cluster_name=None, ssh_user=None, ssh_port=None, ssh_key_file=None,
               use_tls=False):
    # Initialize strptime() to workaround https://bugs.python.org/issue7980. Apparently
    # something in the CM API uses strptime().
    strptime("2015", "%Y")

    Cluster.__init__(self)
    # IMPALA-5455: If the caller doesn't specify port, default it based on use_tls
    if port is None:
      if use_tls:
        port = CM_TLS_PORT
      else:
        port = CM_CLEAR_PORT
    self.cm = CmApiResource(host_name, server_port=port, username=user, password=password,
                            use_tls=use_tls)
    clusters = self.cm.get_all_clusters()
    if not clusters:
      raise Exception("No clusters found in CM at %s" % host_name)
    if cluster_name:
      clusters_by_name = dict((c.name, c) for c in clusters)
      if cluster_name not in clusters_by_name:
        raise Exception(("No clusters named %s found in CM at %s."
            "Available clusters are %s.")
            % (cluster_name, host_name, ", ".join(sorted(clusters_by_name.keys()))))
      self.cm_cluster = clusters_by_name[cluster_name]
    else:
      if len(clusters) > 1:
        raise Exception(("Too many clusters found in CM at %s;"
            " a cluster name must be provided")
            % host_name)
      self.cm_cluster = clusters[-1]

    self.ssh_user = ssh_user
    self.ssh_port = ssh_port
    self.ssh_key_file = ssh_key_file
    self._ssh_client_lock = Lock()
    self._ssh_clients_by_host_name = defaultdict(list)

  def shell(self, cmd, host_name, timeout_secs=DEFAULT_TIMEOUT):
    with self._ssh_client(host_name) as client:
      return client.shell(cmd, timeout_secs=timeout_secs)

  @contextmanager
  def _ssh_client(self, host_name):
    """Returns an SSH client for use in a 'with' block. When the 'with' context exits,
       the client will be kept for reuse.
    """
    with self._ssh_client_lock:
      clients = self._ssh_clients_by_host_name[host_name]
      if clients:
        client = clients.pop()
      else:
        # IMPALA-7460: Insulate this import away from the global context so as to avoid
        # requiring Paramiko unless it's absolutely needed.
        from tests.util.ssh_util import SshClient
        LOG.debug("Creating new SSH client for %s", host_name)
        client = SshClient()
        client.connect(host_name, username=self.ssh_user, key_filename=self.ssh_key_file)
    error_occurred = False
    try:
      yield client
    except Exception:
      error_occurred = True
      raise
    finally:
      if not error_occurred:
        with self._ssh_client_lock:
          self._ssh_clients_by_host_name[host_name].append(client)

  def _init_local_hadoop_conf_dir(self):
    self._local_hadoop_conf_dir = mkdtemp()
    data = StringIO(self.cm.get("/clusters/%s/services/%s/clientConfig"
      % (self.cm_cluster.name, self._find_service("HIVE").name)))
    zip_file = ZipFile(data)
    for name in zip_file.namelist():
      if name.endswith("/"):
        continue
      extract_path = os.path.join(self._local_hadoop_conf_dir, os.path.basename(name))
      with open(extract_path, "w") as conf_file:
        conf_file.write(zip_file.open(name).read())

  def _find_service(self, service_type):
    """Find a service by its CM API service type. An exception will be raised if no
       service is found or multiple services are found. See the CM API documentation for
       more details about the service type.
    """
    services = [s for s in self.cm_cluster.get_all_services() if s.type == service_type]
    if not services:
      raise Exception("No service of type %s found in cluster %s"
          % (service_type, self.cm_cluster.name))
    if len(services) > 1:
      raise Exception("Found %s services in cluster %s; only one is expected."
        % len(services, self.cm_cluster.name))
    return services[0]

  def _find_role(self, role_type, service_type):
    """Find a role by its CM API role and service type. An exception will be raised if
       no roles are found. See the CM API documentation for more details about the
       service and role types.
    """
    service = self._find_service(service_type)
    roles = service.get_roles_by_type(role_type)
    if not roles:
      raise Exception("No roles of type %s found in service %s"
          % (role_type, service.name))
    return roles[0]

  def _init_hdfs(self):
    self._hdfs = Hdfs(self, "hdfs")

  def _init_hive(self):
    hs2 = self._find_role("HIVESERVER2", "HIVE")
    host = self.cm.get_host(hs2.hostRef.hostId)
    config = hs2.get_config(view="full")["hs2_thrift_address_port"]
    self._hive = Hive(self, str(host.hostname), int(config.value or config.default))

  def _init_impala(self):
    self._impala = CmImpala(self, self._find_service("IMPALA"))
Ejemplo n.º 16
0
cm_host = "localhost"
api = ApiResource(cm_host, username="******", password="******")

print "*** CLUSTERS ***"

clusters = None
# List clusters
for c in api.get_all_clusters():
    print "Cluster \"%s\" is version %s" % (c.name, c.version)
    clusters = c

print "*** HOSTS ***"

for host_ref in c.list_hosts():
    host = api.get_host(host_ref.hostId)
    print host.hostname

print "*** SERVICES ***"

hdfs = None
# List services & health info
for s in clusters.get_all_services():
  print "Service \"%s\" -- state \"%s\" -- health \"%s\"" %(s.name, s.serviceState, s.healthSummary)
  # Get HDFS service
  if 'hdfs' in s.type.lower():
    hdfs = s

print "*** HDFS Service checks (" + hdfs.serviceUrl + ") ***"

print "*** ROLES FOR HDFS ***"
Ejemplo n.º 17
0
class CmCluster(Cluster):
    def __init__(self,
                 host_name,
                 port=None,
                 user="******",
                 password="******",
                 cluster_name=None,
                 ssh_user=None,
                 ssh_port=None,
                 ssh_key_file=None,
                 use_tls=False):
        # Initialize strptime() to workaround https://bugs.python.org/issue7980. Apparently
        # something in the CM API uses strptime().
        strptime("2015", "%Y")

        Cluster.__init__(self)
        # IMPALA-5455: If the caller doesn't specify port, default it based on use_tls
        if port is None:
            if use_tls:
                port = CM_TLS_PORT
            else:
                port = CM_CLEAR_PORT
        self.cm = CmApiResource(host_name,
                                server_port=port,
                                username=user,
                                password=password,
                                use_tls=use_tls)
        clusters = self.cm.get_all_clusters()
        if not clusters:
            raise Exception("No clusters found in CM at %s" % host_name)
        if cluster_name:
            clusters_by_name = dict((c.name, c) for c in clusters)
            if cluster_name not in clusters_by_name:
                raise Exception(("No clusters named %s found in CM at %s."
                                 "Available clusters are %s.") %
                                (cluster_name, host_name, ", ".join(
                                    sorted(clusters_by_name.keys()))))
            self.cm_cluster = clusters_by_name[cluster_name]
        else:
            if len(clusters) > 1:
                raise Exception(
                    ("Too many clusters found in CM at %s;"
                     " a cluster name must be provided") % host_name)
            self.cm_cluster = clusters[-1]

        self.ssh_user = ssh_user
        self.ssh_port = ssh_port
        self.ssh_key_file = ssh_key_file
        self._ssh_client_lock = Lock()
        self._ssh_clients_by_host_name = defaultdict(list)

    def shell(self, cmd, host_name, timeout_secs=DEFAULT_TIMEOUT):
        with self._ssh_client(host_name) as client:
            return client.shell(cmd, timeout_secs=timeout_secs)

    @contextmanager
    def _ssh_client(self, host_name):
        """Returns an SSH client for use in a 'with' block. When the 'with' context exits,
       the client will be kept for reuse.
    """
        with self._ssh_client_lock:
            clients = self._ssh_clients_by_host_name[host_name]
            if clients:
                client = clients.pop()
            else:
                LOG.debug("Creating new SSH client for %s", host_name)
                client = SshClient()
                client.connect(host_name,
                               username=self.ssh_user,
                               key_filename=self.ssh_key_file)
        error_occurred = False
        try:
            yield client
        except Exception:
            error_occurred = True
            raise
        finally:
            if not error_occurred:
                with self._ssh_client_lock:
                    self._ssh_clients_by_host_name[host_name].append(client)

    def _init_local_hadoop_conf_dir(self):
        self._local_hadoop_conf_dir = mkdtemp()
        data = StringIO(
            self.cm.get(
                "/clusters/%s/services/%s/clientConfig" %
                (self.cm_cluster.name, self._find_service("HIVE").name)))
        zip_file = ZipFile(data)
        for name in zip_file.namelist():
            if name.endswith("/"):
                continue
            extract_path = os.path.join(self._local_hadoop_conf_dir,
                                        os.path.basename(name))
            with open(extract_path, "w") as conf_file:
                conf_file.write(zip_file.open(name).read())

    def _find_service(self, service_type):
        """Find a service by its CM API service type. An exception will be raised if no
       service is found or multiple services are found. See the CM API documentation for
       more details about the service type.
    """
        services = [
            s for s in self.cm_cluster.get_all_services()
            if s.type == service_type
        ]
        if not services:
            raise Exception("No service of type %s found in cluster %s" %
                            (service_type, self.cm_cluster.name))
        if len(services) > 1:
            raise Exception(
                "Found %s services in cluster %s; only one is expected." %
                len(services, self.cm_cluster.name))
        return services[0]

    def _find_role(self, role_type, service_type):
        """Find a role by its CM API role and service type. An exception will be raised if
       no roles are found. See the CM API documentation for more details about the
       service and role types.
    """
        service = self._find_service(service_type)
        roles = service.get_roles_by_type(role_type)
        if not roles:
            raise Exception("No roles of type %s found in service %s" %
                            (role_type, service.name))
        return roles[0]

    def _init_hdfs(self):
        self._hdfs = Hdfs(self, "hdfs")

    def _init_hive(self):
        hs2 = self._find_role("HIVESERVER2", "HIVE")
        host = self.cm.get_host(hs2.hostRef.hostId)
        config = hs2.get_config(view="full")["hs2_thrift_address_port"]
        self._hive = Hive(self, str(host.hostname),
                          int(config.value or config.default))

    def _init_impala(self):
        self._impala = CmImpala(self, self._find_service("IMPALA"))
Ejemplo n.º 18
0
class NiagaraCMApi(object):
    def __init__(self,
                 cm_host,
                 user,
                 password,
                 cluster='cluster',
                 port='7180',
                 version=17):
        self.cm_host = cm_host
        self.user = user
        self.password = password
        self.cluster = cluster
        self.port = port
        self.version = version

        self.api = ApiResource(server_host=self.cm_host,
                               server_port=self.port,
                               username=self.user,
                               password=self.password,
                               version=self.version)

    def get_hosts_by_role(self, service_name, role, haStatus=None):
        """
        Method gets all hosts that runs specific service and role.

        Args:
            service_name(str): Name of service that runs under cloudera manager.
            role(str): Role name (f.e. KAFKA_BROKER)

        Returns:
            Sorted list of hostnames, that runs specific service and type.

        """
        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        service_nodes = service.get_roles_by_type(role)
        result = []
        for server in service_nodes:
            if haStatus == 'ACTIVE' and server.haStatus != 'ACTIVE':
                continue
            host_reference = server.hostRef.hostId
            result.append(self.api.get_host(host_reference).hostname)
        return result

    def get_kafka_broker_id_by_hostname(self,
                                        nodename,
                                        role='KAFKA_BROKER',
                                        service_name='kafka'):
        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        service_nodes = service.get_roles_by_type(role)
        for node in service_nodes:
            hostname = self.api.get_host(node.hostRef.hostId).hostname
            if hostname == nodename:
                broker_id = node.get_config()['broker.id']
                return broker_id

    def get_service_ports(self, service_name, role_config_group):
        """
        Method gets ports of specific type of service.

        Args:
            service_name(str): Name of service that runs under cloudera manager.
            role_config_group(str): Role config group name (f.e. kafka-KAFKA_BROKER-BASE)

        Returns:
            ports(dict): Dictionary hostname:port.
        """

        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        config = service.get_role_config_group(role_config_group)
        if service_name == 'kafka':
            try:
                kafka_port = config.config['port'].value
            except AttributeError:
                kafka_port = config.config['port']
            return kafka_port
        elif service_name == 'zookeeper':
            try:
                zk_port = config.config['clientPort'].value
            except AttributeError:
                zk_port = config.config['clientPort']
            return zk_port
        else:
            raise ValueError("Unknown service {0}".format(service_name))

    def get_all_role_config_groups(self, service_name):
        """
        Method gets all service's role config groups names, that could be
        used in get_service_ports.

        Args:
            service_name(str): Name of service that runs under cloduera manager.

        Returns:
            result(dict): Dictionary with all available role config groups names.
        """

        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        all_role_groups = service.get_all_role_config_groups()
        result = []
        for role_group in all_role_groups:
            result.append(role_group.name)
        return result

    def get_log_dirs_for_kafka_broker(self,
                                      nodename,
                                      service_name='kafka',
                                      role='KAFKA_BROKER'):
        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        service_nodes = service.get_roles_by_type(role)
        for node in service_nodes:
            hostname = self.api.get_host(node.hostRef.hostId).hostname
            if hostname == nodename:
                config = node.get_config()['log.dirs'].split(',')
                return config

    def get_broker_status(self,
                          nodename,
                          service_name='kafka',
                          role='KAFKA_BROKER'):
        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        service_nodes = service.get_roles_by_type(role)
        for node in service_nodes:
            hostname = self.api.get_host(node.hostRef.hostId).hostname
            if hostname == nodename:
                return node.roleState, node.maintenanceMode

    def kafka_broker_action(self,
                            nodename,
                            action,
                            service_name='kafka',
                            role='KAFKA_BROKER'):
        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        service_nodes = service.get_roles_by_type(role)
        for node in service_nodes:
            hostname = self.api.get_host(node.hostRef.hostId).hostname
            if hostname == nodename:
                _, maintenance = self.get_broker_status(nodename=nodename)
                if not maintenance:
                    if action == 'start':
                        cmd = service.start_roles(node.name)
                    elif action == 'stop':
                        cmd = service.stop_roles(node.name)
                    elif action == 'restart':
                        cmd = service.restart_roles(node.name)
                    else:
                        return 'Unknown action {0}'.format(action)

                    cmd[0].wait()
                    state, _ = self.get_broker_status(nodename=nodename)
                    return state
                else:
                    return maintenance

    def edit_log_dir_from_kafka_broker(self,
                                       nodename,
                                       log_dir,
                                       action,
                                       service_name='kafka',
                                       role='KAFKA_BROKER'):
        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        service_nodes = service.get_roles_by_type(role)
        for node in service_nodes:
            hostname = self.api.get_host(node.hostRef.hostId).hostname
            if hostname == nodename:
                config = node.get_config()
                try:
                    log_dirs = config['log.dirs']
                except KeyError:
                    error = "No log dirs exists."
                    log_dirs = ''
                if action == 'remove':
                    if log_dir in log_dirs:
                        new_log_dirs = log_dirs.replace(log_dir, '').replace(
                            ',,', ',').strip(',')
                    else:
                        return 0, 'Log dir {0} is not in a config.'.format(
                            log_dir)
                elif action == 'add':
                    if log_dir not in log_dirs:
                        new_log_dirs = log_dirs + ',' + log_dir.replace(
                            ',,', ',').strip(',')
                    else:
                        return 0, 'Log dir {0} is already in a config.'.format(
                            log_dir)
                else:
                    return 2, 'Error: unknown action {0}'.format(action)
                new_config = config
                new_config['log.dirs'] = new_log_dirs
                try:
                    node.update_config(new_config)
                except ApiException as e:
                    return 1, 'Error: {0}'.format(e)
                else:
                    return 0, 'Broker config updated.'

    def get_role_types(self, service_name):
        """
        Methos gets all service's role names, that could be
        used in get_host_by_role method.

        Args:
            service_name(str): Name of service that runs under cloduera manager.

        Returns:
            all_roles(dict): Dictionary with all available role names.
        :param service_name:
        :return:
        """

        cluster = self.api.get_cluster(self.cluster)
        service = cluster.get_service(service_name)
        all_roles = service.get_role_types()
        return all_roles