Ejemplo n.º 1
0
def init_cluster():
    # wait for all cloudera agent processes to come up
    BDVLIB_ServiceWait(
        [["services", "cloudera_scm_agent", NODE_GROUP_ID, "kts"]])
    # make sure cloudera manager has received registration
    # for all new agents
    all_cloudera_hosts = get_hosts_for_service(
        ["services", "cloudera_scm_agent"])
    api = ApiResource(CM_HOST, username="******", password="******")
    while True:
        current_all_hosts = map(lambda x: x.hostname, api.get_all_hosts())
        setup_logger.info("Currently registered hosts with CM " +
                          str(current_all_hosts))
        if all(x in current_all_hosts for x in all_cloudera_hosts):
            break
        setup_logger.info(
            "waiting for new nodes to register with cloudera manager")
        time.sleep(10)
    manager = api.get_cloudera_manager()
    manager.update_config(CM_CONFIG)
    cluster = api.create_cluster(CLUSTER_NAME, CDH_MAJOR_VERSION,
                                 CDH_FULL_VERSION)
    KTS_HOSTS = ConfigMeta.getWithTokens(
        ['nodegroups', NODE_GROUP_ID, 'roles', 'kts', 'fqdns'])
    cluster.add_hosts(KTS_HOSTS)

    return (cluster, manager)
Ejemplo n.º 2
0
def adjust_yarn_memory_limits(region, stack_name, restart=True):
    ec2_conn = create_ec2_connection(region)
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    with cm_tunnel_ctx(manager_instance) as local_port:
        cm_api = ApiResource('localhost', username='******', password='******',
                             server_port=local_port, version=9)
        cluster = list(cm_api.get_all_clusters())[0]
        host = list(cm_api.get_all_hosts())[0]  # all hosts same instance type
        yarn = filter(lambda x: x.type == 'YARN',
                      list(cluster.get_all_services()))[0]
        rm_cg = filter(lambda x: x.roleType == 'RESOURCEMANAGER',
                       list(yarn.get_all_role_config_groups()))[0]
        nm_cg = filter(lambda x: x.roleType == 'NODEMANAGER',
                       list(yarn.get_all_role_config_groups()))[0]
        rm_cg.update_config({
            'yarn_scheduler_maximum_allocation_mb': (
                int(host.totalPhysMemBytes / 1024. / 1024.)),
            'yarn_scheduler_maximum_allocation_vcores': host.numCores})
        nm_cg.update_config({
            'yarn_nodemanager_resource_memory_mb': (
                int(host.totalPhysMemBytes / 1024. / 1024.)),
            'yarn_nodemanager_resource_cpu_vcores': host.numCores})
        cluster.deploy_client_config().wait()
        if restart:
            cluster.restart().wait()
Ejemplo n.º 3
0
class ImpalaCluster(object):
  def __init__(self, cm_host, cm_cluster_name, username, password):
    self.cm_api = ApiResource(cm_host, username=username, password=password)
    self.hosts = dict()
    self.services = list()
    self.cluster = self.cm_api.get_cluster(cm_cluster_name)
    if self.cluster is None:
      raise RuntimeError, 'Cluster name "%s" not found' % cm_cluster_name

    self.__load_hosts()
    self.__impala_service = ImpalaService(self)

  def _get_all_services(self):
    return self.cluster.get_all_services()

  def get_impala_service(self):
    return self.__impala_service

  def __load_hosts(self):
    self.hosts = dict()
    # Search for all hosts that are in the target cluster.
    # There is no API that provides the list of host in a given cluster, so to find them
    # we must loop through all the hosts and check the cluster name matches.
    for host_info in self.cm_api.get_all_hosts():
      # host_info doesn't include a link to the roleRef so need to do another lookup
      # based on the hostId.
      host = self.cm_api.get_host(host_info.hostId)
      for roleRef.get('clusterName') == self.cluster_name:
        self.hosts[host_info.hostId] = Host(host)
          break
Ejemplo n.º 4
0
def init_cluster():
    # wait for all cloudera agent processes to come up
    setup_logger.info("Creating Clutser.")
    BDVLIB_ServiceWait([["services", "cloudera_scm_agent", NODE_GROUP_ID]])
    # make sure cloudera manager has received registration
    # for all new agents
    all_cloudera_hosts = get_hosts_for_service(
        ["services", "cloudera_scm_agent"])
    api = ApiResource(CM_HOST, username=ADMIN_USER, password=ADMIN_PASS)
    while True:
        current_all_hosts = map(lambda x: x.hostname, api.get_all_hosts())
        setup_logger.info("Currently registered hosts with CM " +
                          str(current_all_hosts))
        if all(x in current_all_hosts for x in all_cloudera_hosts):
            break
        setup_logger.info(
            "waiting for new nodes to register with cloudera manager")
        time.sleep(10)
    manager = api.get_cloudera_manager()
    manager.update_config(CM_CONFIG)
    cluster = api.create_cluster(CLUSTER_NAME, CDH_MAJOR_VERSION,
                                 CDH_FULL_VERSION)
    cluster.add_hosts(ALL_HOSTS)

    # turn off host swap alerting
    hosts_swap_alert_off(api)

    setup_logger.info("Setting Up SPARK2 Repo....")
    add_spark2_repo(api)
    ##Set java home
    setup_logger.info("Setting Up Java Path....")
    hosts_set_javahome(api)

    return (cluster, manager)
Ejemplo n.º 5
0
 def get_hosts(self):
     hosts = {}
     from cm_api.api_client import ApiResource
     api = ApiResource(self.host, self.port, self.username, self.password)
     for h in api.get_all_hosts():
         hosts[h.hostId] = h.ipAddress
     return hosts
Ejemplo n.º 6
0
def list_hosts(host, username, password, cafile):
  context = ssl.create_default_context(cafile=cafile)

  api = ApiResource(host, username=username, password=password, use_tls=True,
                    ssl_context=context)

  for h in api.get_all_hosts():
    print h.hostname
Ejemplo n.º 7
0
def get_cluster_specs():
    cm_api = ApiResource(os.environ['MANAGER_HOST'], username='******',
                         password='******', server_port=7180, version=9)
    host = list(cm_api.get_all_hosts())[0]  # all hosts same instance type
    cluster = list(cm_api.get_all_clusters())[0]
    yarn = filter(lambda x: x.type == 'YARN',
                  list(cluster.get_all_services()))[0]
    return {'num_worker_nodes': len(yarn.get_roles_by_type('NODEMANAGER')),
            'num_cores': host.numCores, 'node_memory': host.totalPhysMemBytes}
Ejemplo n.º 8
0
def main(cm_host, user, password):
    api = ApiResource(cm_host, username=user, password=password)
    cluster = api.get_all_clusters()[0]
    try:
        cluster.get_service(service_name)
        print "Service %s already configured. Skipping" % service_name
    except ApiException:
        print "creating new service %s" % service_name
        add_kudu_service(cluster, service_name)
        create_kudu_roles(cluster, api.get_all_hosts())
        update_kudu_role_group_configs(cluster)
        start_service(cluster, service_name)
        update_impala_service(cluster, service_name)
        print "Waiting for cluster to restart stale services"
        cluster.restart(restart_only_stale_services=True,
                        redeploy_client_configuration=True).wait()
Ejemplo n.º 9
0
def get_cluster_info(manager_host, server_port=7180, username='******',
                     password='******'):
    cm_api = ApiResource(manager_host, username=username, password=password,
                         server_port=server_port, version=9)
    host = list(cm_api.get_all_hosts())[0]  # all hosts same instance type
    cluster = list(cm_api.get_all_clusters())[0]
    yarn = filter(lambda x: x.type == 'YARN',
                  list(cluster.get_all_services()))[0]
    hive = filter(lambda x: x.type == 'HIVE',
                  list(cluster.get_all_services()))[0]
    impala = filter(lambda x: x.type == 'IMPALA',
                    list(cluster.get_all_services()))[0]
    hive_hs2 = hive.get_roles_by_type('HIVESERVER2')[0]
    hive_host = cm_api.get_host(hive_hs2.hostRef.hostId).hostname
    hive_port = int(
        hive_hs2.get_config('full')['hs2_thrift_address_port'].default)
    impala_hs2 = impala.get_roles_by_type('IMPALAD')[0]
    impala_host = cm_api.get_host(impala_hs2.hostRef.hostId).hostname
    impala_port = int(impala_hs2.get_config('full')['hs2_port'].default)
    return {'num_worker_nodes': len(yarn.get_roles_by_type('NODEMANAGER')),
            'node_cores': host.numCores, 'node_memory': host.totalPhysMemBytes,
            'hive_host': hive_host, 'hive_port': hive_port,
            'impala_host': impala_host, 'impala_port': impala_port}
Ejemplo n.º 10
0
def adjust_yarn_memory_limits(region, stack_name):
    ec2_conn = create_ec2_connection(region)
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    cm_api = ApiResource("localhost", username="******", password="******", server_port=64999, version=9)
    with http_tunnel_ctx(manager_instance, 7180, 64999):
        cluster = list(cm_api.get_all_clusters())[0]
        host = list(cm_api.get_all_hosts())[0]  # all hosts same instance type
        yarn = filter(lambda x: x.type == "YARN", list(cluster.get_all_services()))[0]
        rm_cg = filter(lambda x: x.roleType == "RESOURCEMANAGER", list(yarn.get_all_role_config_groups()))[0]
        nm_cg = filter(lambda x: x.roleType == "NODEMANAGER", list(yarn.get_all_role_config_groups()))[0]
        rm_cg.update_config(
            {
                "yarn_scheduler_maximum_allocation_mb": (int(host.totalPhysMemBytes / 1024.0 / 1024.0)),
                "yarn_scheduler_maximum_allocation_vcores": host.numCores,
            }
        )
        nm_cg.update_config(
            {
                "yarn_nodemanager_resource_memory_mb": (int(host.totalPhysMemBytes / 1024.0 / 1024.0)),
                "yarn_nodemanager_resource_cpu_vcores": host.numCores,
            }
        )
        cluster.deploy_client_config().wait()
        cluster.restart().wait()
Ejemplo n.º 11
0
class handler_cm_api:
    def __init__(self):
        self._user_executing = grp.getgrnam(getpass.getuser())[0]

    def __getitem__(self):
        return self

    def setup(self,
              p_cm_host,
              p_cm_user,
              p_cm_pass,
              p_cm_version,
              p_cluster,
              p_cm_port=None,
              p_use_tls=False):
        self.cm_api = ApiResource(p_cm_host,
                                  server_port=p_cm_port,
                                  version=p_cm_version,
                                  username=p_cm_user,
                                  password=p_cm_pass,
                                  use_tls=p_use_tls)
        handler_cm_api.cluster_hosts = self.cm_api.get_all_hosts()
        if p_cluster:
            self.cluster = filter(lambda x: x.displayName == p_cluster,
                                  self.cm_api.get_all_clusters())[0]
            if not self.cluster:
                print("Error: That cluster is not valid.")
                return
            else:
                self.services = self.cluster.get_all_services()
                self.name = self.cluster.displayName

        tmp_topology = self.cluster.list_hosts()
        self.topology = {}

        for i in range(len(tmp_topology)):
            tmp_host = filter(lambda x: x.hostId == tmp_topology[i].hostId,
                              handler_cm_api.cluster_hosts)[0]
            self.topology[tmp_topology[i].hostId] = tmp_host.hostname

    def get_current_group(self):
        return self._user_executing

###############################
# For internal validations

    def __validate_service(self, p_service):
        v_service = filter(lambda x: x.type == p_service, self.services)

        if not v_service:
            print("Error: Service not found")
            raise SystemExit

        return v_service.pop()

    def __validate_hostname(self, p_hostname):
        v_node = filter(lambda x: x.hostname == p_hostname,
                        handler_cm_api.cluster_hosts)
        if not v_node:
            print("Error: Hostname not found")
            raise SystemExit

        return v_node.pop()

    def __validate_role(self, p_service, p_role, p_hostname):
        v_service = self.__validate_service(p_service)
        v_node = self.__validate_hostname(p_hostname)
        v_roles = filter(lambda x: x.type == p_role, v_service.get_all_roles())
        v_role = filter(lambda x: x.hostRef.hostId == v_node.hostId, v_roles)

        if not v_role:
            print("Error: Role not found in that host")
            raise SystemExit

        return v_role.pop()

######################################################################
# START/STOP/RESTART
######################################################################

    def stop_cluster(self):
        v_cmd = self.cluster.stop()
        v_msg = f_waiting_task(v_cmd)
        print(coloring(*v_msg))

    def start_cluster(self):
        v_cmd = self.cluster.start()
        v_msg = f_waiting_task(v_cmd)
        print(coloring(*v_msg))

    def restart_cluster(self):
        v_cmd = self.cluster.restart()
        v_msg = f_waiting_task(v_cmd)
        print(coloring(*v_msg))

    def rolling_restart_cluster(self):
        v_cmd = self.cluster.rolling_restart()
        v_msg = f_waiting_task(v_cmd)
        print(coloring(*v_msg))

######################################################################
#SERVICES
######################################################################
################
# Status
################
# ------ State

    def check_state_services(self):
        for v_srv in self.services:
            print(coloring(v_srv.serviceState, v_srv.type))

    def check_state_service(self, p_service):
        v_service = self.__validate_service(p_service)
        print(coloring(v_service.serviceState, v_service.type))

    def check_health_services(self):
        for v_srv in self.services:
            print(coloring(v_srv.healthSummary, v_srv.type))

# ----- Health

    def check_health_service(self, p_service):
        v_service = self.__service_validate(p_service)
        print(coloring(v_service.healthSummary, v_service.type))

#####################################
# stop/start/restart/Rolling Restart
#####################################

    def stop_service(self, p_service):
        v_service = self.__validate_service(p_service)
        print("* Stopping " + v_service.type)
        v_cmd = v_service.stop()
        v_msg = f_waiting_task(v_cmd)
        print(coloring(*v_msg))

    def start_service(self, p_service):
        v_service = self.__validate_service(p_service)
        print("* Starting " + v_service.type)
        v_cmd = v_service.start()
        v_msg = f_waiting_task(v_cmd)
        print(coloring(*v_msg))

    def restart_service(self, p_service):
        v_service = self.__validate_service(p_service)
        print("* Restarting " + v_service.type)
        v_cmd = v_service.restart()
        v_msg = f_waiting_task(v_cmd)
        print(coloring(*v_msg))

    def rolling_restart_service(self, p_service):
        v_service = self.__validate_service(p_service)
        try:
            print(" * Rolling Restarting " + v_service.type)
            v_cmd = v_service.rolling_restart()
            v_msg = f_waiting_task(v_cmd)
            print(coloring(*v_msg))
        except:
            if re.match("Command not valid for", str(sys.exc_info()[1])):
                print "It's not possible to use Rolling Restart in this service."
            else:
                raise

###################################################################
# ROLES
###################################################################
#################
# Status
#################

# ---- State

    def check_state_roles(self, p_service):
        v_service = self.__validate_service(p_service)
        print("*" + v_service.type + ":")
        for v_role in v_services.get_all_roles():
            print(
                coloring(
                    v_role.roleState,
                    filter(lambda x: x.hostId == v_role.hostRef.hostId,
                           handler_cm_api.cluster_hosts)[0].hostname) + ":\t" +
                v_role.type)

    def check_state_role(self, p_service, p_role):
        v_service = self.__validate_service(p_service)
        print("*" + v_service.type + ":")
        v_roles = filter(lambda x: x.type == p_role, v_service.get_all_roles())
        for v_role in v_roles:
            print(
                coloring(
                    v_role.roleState,
                    filter(lambda x: x.hostId == v_role.hostRef.hostId,
                           handler_cm_api.cluster_hosts)[0].hostname) + ":\t" +
                v_role.type)

    def check_state_all_roles(self):
        for v_service in self.services:
            self.check_state_roles(v_service.type)
            print('---------------------')

# ---- Health

    def check_health_roles(self, p_service):
        v_service = self.__validate_service(p_service)
        print("*" + v_service.type + ":")
        for v_role in v_service.get_all_roles():
            print(
                coloring(
                    v_role.healthSummary,
                    filter(lambda x: x.hostId == v_role.hostRef.hostId,
                           handler_cm_api.cluster_hosts)[0].hostname) + ":\t" +
                v_role.type)

    def check_health_role(self, p_service, p_role):
        v_service = self.__validate_service(p_service)
        print("*" + v_service.type + ":")
        v_roles = filter(lambda x: x.type == p_role, v_service.get_all_roles())
        for v_role in v_roles:
            print(
                coloring(
                    v_role.healthSummary,
                    filter(lambda x: x.hostId == v_role.hostRef.hostId,
                           handler_cm_api.cluster_hosts)[0].hostname) + ":\t" +
                v_role.type)

    def check_health_all_roles(self):
        for v_service in self.services:
            self.check_health_roles(v_service.type)
            print('---------------------')

#####################
# Stop/Start/Restart

    def stop_role(self, p_service, p_role, p_hostname):
        v_service = self.__validate_service(p_service)
        v_node = self.__validate_hostname(p_hostname)
        v_role = self.__validate_role(p_service, p_role, p_hostname)

        print("* Stopping " + v_role.type)
        v_cmd = v_service.stop_roles(v_role.name)
        v_msg = f_waiting_task(v_cmd[0])
        print(coloring(*v_msg))

    def start_role(self, p_service, p_role, p_hostname):
        v_service = self.__validate_service(p_service)
        v_node = self.__validate_hostname(p_hostname)
        v_role = self.__validate_role(p_service, p_role, p_hostname)

        print("* Starting " + v_role.type)
        v_cmd = v_service.start_roles(v_role.name)
        v_msg = f_waiting_task(v_cmd[0])
        print(coloring(*v_msg))

    def restart_role(self, p_service, p_role, p_hostname):
        v_service = self.__validate_service(p_service)
        v_node = self.__validate_hostname(p_hostname)
        v_role = self.__validate_role(p_service, p_role, p_hostname)

        print("* restarting " + v_role.type)
        v_cmd = v_service.restart_roles(v_role.name)
        v_msg = f_waiting_task(v_cmd[0])
        print(coloring(*v_msg))

###########################################################
#IMPALA QUERIES
###########################################################
# FILTERS
############################

    def setup_filters_impala_queries(self):
        v_start_time = raw_input(
            'Introduce the start time with following format: DD/MM/YYYY_hh:mm:ss. Example: 01/01/2018_00:00:00: '
        )
        if not re.match("^\d{2}/\d{2}/20\d{2}_\d{2}:\d{2}:\d{2}$",
                        v_start_time):
            print("Error: Invalid Format for start time")
            return

        v_end_time = raw_input(
            'Introduce the end time with the following format: DD/MM/YYYY_hh:mm:ss. Example 31/01/2018_00:00:00: '
        )
        if not re.match("^\d{2}/\d{2}/20\d{2}_\d{2}:\d{2}:\d{2}$", v_end_time):
            print("Error: Invalid format for end time")
            return

        v_filter_type = raw_input(
            'Choose the kind of filter: user|duration|state: ')
        if not v_filter_type in ('user', 'duration', 'state'):
            print("Error: Invalid kind of filter")
            return

        if v_filter_type == 'user':
            v_filter_value = raw_input(
                'Introduce the user name you want to filter by: ')
            if not v_filter_value:
                print("Error: Invalid user name")
                return

        elif v_filter_type == 'duration':
            v_filter_value = raw_input(
                'Introduce the query duration you want to filter by: +Xs|-Xs|=Xs. Example: +0s: '
            )
            if not re.match("^[+-=]\d+.\d*[hms]$", v_filter_value):
                print("Error: Invalid duration filter.")
                return

        elif v_filter_type == 'state':
            v_filter_value = raw_input(
                'Introduce the query state you want to filter by: CREATED|INITIALIZED|COMPILED|RUNNING|FINISHED|EXCEPTION|UNKNOWN: '
            )
            if not v_filter_value in ('CREATED', 'INITIALIZED', 'COMPILED',
                                      'RUNNING', 'FINISHED', 'EXCEPTION',
                                      'UNKNOWN'):
                print("Error: Invalid state filter.")
                return

        v_limit = raw_input(
            "Introduce the max num of queries you want to check: ")
        if not re.match("^\d+$", v_limit):
            print("Error: Invalid limit. It has to be an integer")
            return

        return v_start_time, v_end_time, v_filter_type, v_filter_value, int(
            v_limit)

######################################
# Getting queries
######################################

    def get_impala_queries(self,
                           p_start_time=None,
                           p_end_time=None,
                           p_filter_type=None,
                           p_filter_value=None,
                           p_limit=None):
        if not (p_start_time and p_end_time and p_filter_type
                and p_filter_value and p_limit):
            p_start_time, p_end_time, p_filter_type, p_filter_value, p_limit = self.setup_filters_impala_queries(
            )

        v_impala = filter(lambda x: x.type == 'IMPALA', self.services)[0]

        if not v_impala:
            print("Error: Impala service doesnt exist in this cluster.")
            return

        if re.match("^\d{2}/\d{2}/20\d{2}_\d{2}:\d{2}:\d{2}$", p_start_time):
            v_start_time = datetime.strptime(p_start_time, '%d/%m/%Y_%H:%M:%S')
        else:
            print("Error. startTime format is not valid.")
            return

        if re.match("^\d{2}/\d{2}/20\d{2}_\d{2}:\d{2}:\d{2}$", p_start_time):
            v_end_time = datetime.strptime(p_end_time, '%d/%m/%Y_%H:%M:%S')
        else:
            print("Error. startTime format is not valid.")
            return

        if p_filter_type == "user" and type(p_filter_value) == str:
            v_filter_str = 'user = '******'+':
                v_filter_value = p_filter_value.replace('+', '>')
            if p_filter_value[0] == '-':
                v_filter_value = p_filter_value.replace('-', '<')
            v_filter_str = 'queryDuration ' + v_filter_value

        elif p_filter_type == "state" and p_filter_value in (
                'CREATED', 'INITIALIZED', 'COMPILED', 'RUNNING', 'FINISHED',
                'EXCEPTION', 'UNKNOWN'):
            v_filter_str = 'queryState = ' + v_filter_value

        else:
            print("Error: Filter is not valid.")
            return

        if type(p_limit) == int and p_limit < 201: v_limit = p_limit
        else:
            print("Error: Limit is not valid. It must be > 0 and <= 200")
            return

        v_queries = v_impala.get_impala_queries(v_start_time, v_end_time,
                                                v_filter_str, v_limit).queries

        v_output = ''
        for vq in v_queries:
            v_coordinator = filter(lambda x: x.hostId == vq.coordinator.hostId,
                                   self.cluster_hosts)[0].hostname

            v_output += COLORS.BLUE + "##################################################################################" + COLORS.RESET + "\n"
            v_output += vq.queryId + " -- " + vq.queryState + ":\n"
            v_output += COLORS.RED + vq.statement + COLORS.RESET + "\n"
            v_output += COLORS.GREEN + "--- Attributes ---" + COLORS.RESET + "\n"
            v_output += "Query Type: " + vq.queryType + "\n"
            if 'query_status' in vq.attributes.keys():
                v_output += "Query Status: " + vq.attributes[
                    'query_status'] + "\n"

            v_output += "User: "******"\n"
            v_output += "Database: " + vq.database + "\n"
            if 'pool' in vq.attributes.keys():
                v_output += "Pool: " + vq.attributes['pool'] + "\n"

            v_output += "Starts at: " + vq.startTime.strftime(
                "%d/%m/%Y_%H:%M:%S") + "\n"
            v_output += "Ends at: " + vq.endTime.strftime(
                "%d/%m/%Y_%H:%M:%S") + "\n"
            v_output += "Coordinator: " + v_coordinator + "\n"
            v_output += "Rows Produced: " + str(vq.rowsProduced) + "\n"

            if vq.attributes['file_formats']:
                v_output += "File Format: " + vq.attributes[
                    'file_formats'] + "\n"
            if 'hdfs_bytes_read' in vq.attributes.keys():
                v_output += "HDFS bytes read: " + vq.attributes[
                    'hdfs_bytes_read'] + "\n"
            if 'memory_aggregate_peak' in vq.attributes.keys():
                v_output += "Memory Aggregate Peak: " + vq.attributes[
                    'memory_aggregate_peak'] + "\n"
            if 'thread_cpu_time' in vq.attributes.keys():
                v_output += "Threads Cpu Time: " + vq.attributes[
                    'thread_cpu_time'] + "\n"

        print(v_output)
        print("Do you want to save the output? (Y/N)")
        v_save = raw_input("Your choice: ").upper()
        if v_save == 'Y':
            v_output_nc = re.sub("\\x1b\[\d+m", "", v_output)
            v_file = "/tmp/impala_queries_" + datetime.now().strftime(
                "%Y%m%d_%H%M%S") + ".log"
            with open(v_file, 'a') as file_output:
                file_output.write(v_output_nc)
            print("The output was written in: " + v_file)

######################
# Getting details
######################

    def get_details_impala_query(self, p_query_id=None):
        if not p_query_id:
            v_query_id = raw_input(
                'Introduce the query id you want to check the details: ')
        else:
            v_query_id = p_query_id

        v_impala = filter(lambda x: x.type == 'IMPALA', self.services)[0]
        v_queries = v_impala.get_impala_queries(
            datetime.now() - timedelta(days=30), datetime.now(),
            'queryDuration > 0s', 1000).queries

        v_query = filter(lambda x: x.queryId == v_query_id, v_queries)
        if not v_query:
            print(
                "Error: The query_id is not valid, was executed more than 30 days ago or is not between the last 1000 queries. 1000 is the limit."
            )
            return
        elif not v_query[0].detailsAvailable:
            print("Error: This Query does not have details available.")
            return
        else:
            v_output = "/tmp/impala_query_details_" + v_query[
                0].queryId + "_" + datetime.now().strftime(
                    "%Y%m%d_%H%M%S") + ".log"
            with open(v_output, 'a') as file_output:
                file_output.write(
                    str(v_impala.get_query_details(v_query[0].queryId)))
            print("The output was written in: " + v_output)

#######################

    def get_same_configuration(self):
        v_configs = []
        v_command = 'hadoop org.apache.hadoop.conf.Configuration'

        for v_node in self.topology.values():
            v_ssh = subprocess.Popen(
                ["ssh", v_node, "-o", "StrictHostKeyChecking=no", v_command],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            v_configs += [v_ssh.stdout.readlines()]

        if len(self.topology) != len(v_configs):
            print(
                "Error: The num configs is different to the num of nodes in this cluster"
            )
            return

        if v_configs[1:] == v_configs[:-1]:
            print(coloring('GOOD', "The configs are the same in all nodes."))
            print("The nodes which were checked are: " +
                  ', '.join(self.topology.values()))

        else:
            print(coloring('BAD', "The configs are not the same."))
Ejemplo n.º 12
0
class CMInventory(object):

    def _empty_inventory(self):
        return {"_meta" : {"hostvars" : {}}}

    def __init__(self):
        ''' Main execution path '''

        self.config = ConfigParser.SafeConfigParser()
        if os.environ.get('CM_INI', ''):
            config_files = [os.environ['CM_INI']]
        else:
            config_files =  CM_CONFIG_FILES
        for config_file in config_files:
            if os.path.exists(config_file):
                self.config.read(config_file)
                break

        # Load up connections info based on config and then environment variables
        username = (self.config.get('auth', 'username') or
                    os.environ.get('CM_USERNAME', None))
        password = (self.config.get('auth', 'password') or
                   os.environ.get('CM_PASSWORD', None))
        host     = (self.config.get('auth', 'host') or
                    os.environ.get('CM_HOST', None))
        if self.config.has_option('auth', 'port'):
            port = self.config.get('auth', 'port')
        else:
            port = os.environ.get('CM_PORT', None)
        if self.config.has_option('auth', 'use_tls'):
            use_tls = self.config.get('auth', 'use_tls')
        else:
            use_tls = os.environ.get('CM_USETLS', False)
        if self.config.has_option('auth', 'version'):
            version = self.config.get('auth', 'version')
        else:
            version = os.environ.get('CM_VERSION', None)

        # Limit the clusters being scanned
        self.filter_clusters = os.environ.get('CM_CLUSTERS')
        if not self.filter_clusters and self.config.has_option('defaults', 'clusters'):
            self.filter_clusters = self.config.get('defaults', 'clusters')
        if self.filter_clusters:
            self.filter_clusters = [x.strip() for x in self.filter_clusters.split(',') if x.strip()]

        self.inv_lock = Lock()
        self.cm = ApiResource(host, port, username, password, use_tls)


    def _put_cache(self, name, value):
        '''
        Saves the value to cache with the name given.
        '''
        if self.config.has_option('defaults', 'cache_dir'):
            cache_dir = os.path.expanduser(self.config.get('defaults', 'cache_dir'))
            if not os.path.exists(cache_dir):
                os.makedirs(cache_dir)
            cache_file = os.path.join(cache_dir, name)
            with open(cache_file, 'w') as cache:
                json.dump(value, cache)

    def _get_cache(self, name, default=None):
        '''
        Retrieves the value from cache for the given name.
        '''
        if self.config.has_option('defaults', 'cache_dir'):
            cache_dir = self.config.get('defaults', 'cache_dir')
            cache_file = os.path.expanduser( os.path.join(cache_dir, name) )
            print cache_file
            if os.path.exists(cache_file):
                print "here"
                if self.config.has_option('defaults', 'cache_max_age'):
                    cache_max_age = self.config.getint('defaults', 'cache_max_age')
                else:
                    cache_max_age = 0
                cache_stat = os.stat(cache_file)
                if (cache_stat.st_mtime + cache_max_age) >= time.time():
                    with open(cache_file) as cache:
                        return json.load(cache)
        return default

    def get_host(self, hostname):
        inv = self._get_cache(hostname, None)
        if inv is not None:
            return inv

        if inv is None:
            try:
                inv = hosts.get_host(self.cm, hostname)
            except ObjectNotFoundError:
                pass

        if inv is not None:
            self._put_cache(hostname, inv)
        return inv or {}

    def _add_host(self, inv, parent_group, host_name):
        '''
        Add the host to the parent group in the given inventory.
        '''
        with self.inv_lock:
            p_group = inv.setdefault(parent_group, [])

        if isinstance(p_group, dict):
            group_hosts = p_group.setdefault('hosts', [])
        else:
            group_hosts = p_group
        if host_name not in group_hosts:
            group_hosts.append(host_name)

    def _add_child(self, inv, parent_group, child_group):
        '''
        Add a child group to a parent group in the given inventory.
        '''
        if parent_group != 'all':
            with self.inv_lock:
                p_group = inv.setdefault(parent_group, {})

            if not isinstance(p_group, dict):
                with self.inv_lock:
                    inv[parent_group] = {'hosts': p_group}
                    p_group = inv[parent_group]
            group_children = p_group.setdefault('children', [])
            if child_group not in group_children:
                group_children.append(child_group)
        with self.inv_lock:
            inv.setdefault(child_group, [])

    def get_inventory(self, meta_hostvars=True, n_threads=5):
        '''
        Reads the inventory from cache or VMware API via pSphere.
        '''
        # Use different cache names for guests only vs. all hosts.
        cache_name = '__inventory_all__'

        inv = self._get_cache(cache_name, None)
        if inv is not None:
            print "Here"
            return inv


        def _build_host_inventory(hostRef,inv,meta_hostvars):
            host = hosts.get_host(self.cm, hostRef.hostId)
            #print host.hostname

            self._add_host(inv, 'all', host.hostname)
            if meta_hostvars:
                inv['_meta']['hostvars'][host.hostname] = host.to_json_dict(preserve_ro=True)
            self._put_cache(host.hostname, host.to_json_dict(preserve_ro=True))

            # Group by cluster
            if host.clusterRef:
                cluster = clusters.get_cluster(self.cm, host.clusterRef.clusterName)
                self._add_child(inv, 'all', cluster.displayName)
                self._add_host(inv, cluster.displayName, host.hostname)

                if host.roleRefs:
                    for roleRef in host.roleRefs:
                        role = roles.get_role(self.cm, roleRef.serviceName, roleRef.roleName, roleRef.clusterName)

                        # Group by service
                        service = services.get_service(self.cm, roleRef.serviceName, roleRef.clusterName)

                        # There is no way to ensure that service display name is unique across clusters
                        # The only simple and unique representation of the service that can be used
                        # is the concatination of the service name and the cluster's name
                        service_group = cluster.displayName + '-' + service.displayName
                        self._add_child(inv, 'all', service.type)
                        self._add_child(inv, service.type, service_group)
                        self._add_child(inv, cluster.displayName, service_group)
                        self._add_host(inv, service_group, host.hostname)

                        # Group by role, roles depend on services and clusters, so the only unique and
                        # simple representation of a Group is the concatination of the role type, service
                        # name and the cluster name
                        role_group = cluster.displayName + '-' + service.displayName + '-' + role.type
                        self._add_child(inv, 'all', role.type)
                        #self._add_child(inv, role.type, service_group)
                        #self._add_child(inv, service_group, role_group)
                        self._add_child(inv, role.type, role_group)
                        self._add_host(inv, role_group, host.hostname)

                        # Group by role Group
                        role_group = role.roleConfigGroupRef.roleConfigGroupName
                        self._add_child(inv, role.type, role_group)
                        self._add_host(inv, role_group, host.hostname)

                        # Group by role template
                        for template in host_templates.get_all_host_templates(self.cm, host.clusterRef.clusterName):
                            self._add_child(inv, 'all', template.name)
                            for group in template.roleConfigGroupRefs:
                                if role_group == group.roleConfigGroupName:
                                    self._add_child(inv, template.name, role_group)
                    else:
                        self._add_child(inv, 'all', 'no_role')
                        self._add_host(inv, 'no_role', host.hostname)

                # Group by Rack
                self._add_child(inv, 'all', host.rackId)
                self._add_host(inv, host.rackId, host.clusterRef.clusterName)
            else:
                cluster_group = "no_cluster"
                self._add_child(inv, 'all', cluster_group)
                self._add_host(inv, cluster_group, host.hostname)


        inv = {'all': {'hosts': []}}
        if meta_hostvars:
            inv['_meta'] = {'hostvars': {}}

        if self.filter_clusters:
            # Loop through clusters and find hosts:
            hosts_list = []
            for host in self.cm.get_all_hosts():
                if host.clusterRef:
                    if clusters.get_cluster(self.cm, host.clusterRef.clusterName).displayName  in self.filter_clusters:
                        hosts_list.append(host)
        else:
            # Get list of all hosts
            hosts_list =  self.cm.get_all_hosts()


        if n_threads == 1:
            for hostRef in hosts_list:
                _build_host_inventory(inv,hostRef,meta_hostvars)
        else:
            _partial_build_host_inventory = partial(_build_host_inventory, inv=inv,meta_hostvars=meta_hostvars)
            pool = ThreadPool(n_threads)
            if sys.version_info <= (2, 6):
                pool.map(_partial_build_host_inventory, hosts_list)
            else:
                pool.map_async(_partial_build_host_inventory, hosts_list).get(1 << 31)


        self._put_cache(cache_name, inv)
        return inv
Ejemplo n.º 13
0
def create_cluster(config_dict):
    config.read(['./conf/hadrian.ini','./conf/cluster_specs.ini', './conf/cloudera-manager/cm.ini'])
    
    
    cm_cluster_name = config_grabber("Globals")['cm.cluster.name']
    cm_username = config_grabber("Globals")['cm.username']
    cm_password = config_grabber("Globals")['cm.password']
    cm_port = config_grabber("Globals")['cm.port']
    version = config_grabber('Globals')['cdh.cluster.version']
    cm_server = config_grabber(cm_cluster_name + '-en')['cm.server']
    
    #Grab all configuration files in the directory with the CM Cluster Name.
    
    for i in os.listdir('./conf/' + cm_cluster_name):
        config.read('./conf/' + cm_cluster_name + '/' + i)
    
    all_nodes = list()

    while (get_cm_status(cm_server + ':' + cm_port) != 200):
        print 'Waiting for CM Server to start... '
        time.sleep(15)
    
    api = ApiResource(cm_server, cm_port, cm_username, cm_password)
    # create cluster
    cluster = api.create_cluster(cm_cluster_name, version.upper())
    
    #Config CM
    print 'Applying any configuration changes to Cloudera Manager'
    cmanager = api.get_cloudera_manager()
    cmanager.update_config(config_grabber('cloudera-manager-updates'))
        
    planned_nodes = config_grabber(cm_cluster_name + '-en')['full.list'].split(',')
    for k, v in config_grabber(cm_cluster_name + '-dn').iteritems():
        for j in v.split(','):
            planned_nodes.append(j)
    
    # TODO make this smarter.  show which agents haven't checked in.  Add the option to continue without them.
    if len(api.get_all_hosts()) != len(planned_nodes):
        print 'Waiting for all agents to check into the CM Server before continuing.'
        
        while len(planned_nodes) > api.get_all_hosts():
            print 'Waiting for the final set of CM Agent nodes to check in.' 
            time.sleep(5)
        
    print 'Updating Rack configuration for data nodes.'
    all_hosts = list()
    for host in api.get_all_hosts():
        all_hosts.append(host.hostId)
        for k,v in config_grabber(cm_cluster_name + '-dn').iteritems():
            if host.hostname in v:
                print 'Setting host: ' + host.hostname + ' to rack /default/' + k
                host.set_rack_id('/default/' + k)
    
    print 'Adding all hosts to cluster.'
    cluster.add_hosts(all_hosts)

    # download CDH Parcels
    # TODO add some logic here to make the parcel list something that's read from the hadrian.ini
    # This will allow support for other CDH packages, Search, etc.
    if config_grabber('Globals')['cdh.distribution.method'] == 'parcels':
        distribute_parcel(cluster, 'CDH', config_grabber("Globals")['cdh.parcel.version'])
    
    if config_dict.get('hdfs_ha') == True:
        create_zookeeper_service(config_dict, cluster)
    create_hdfs_service(config_dict, cluster)    

    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configurations'
    else:
        print 'Client configuration deployment complete.'

    create_mapred_service(config_dict, cluster, cm_server)
    if config_dict.get('hbase') == True:
        if config_dict.get('hdfs_ha') == False:
            create_zookeeper_service(config_dict, cluster)
        create_hbase_service(config_dict, cluster)
    if config_dict.get('hive') == True:
         create_hive_service(config_dict, cluster)
    print 'Starting final client configuration deployment for all services.'
    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configuration.'
    else:
        print 'Client configuration deployment complete.  The cluster is all yours.  Happy Hadooping.'
Ejemplo n.º 14
0
# Get a handle to the API client
from cm_api.api_client import ApiResource
import time
import sys
cm_host = raw_input ("Enter IP address of CM: ")
cm_username = raw_input ("Enter username: "******"Enter password: "******"Clusters:"
cdh5 = None
for c in api.get_all_clusters():
       print c.name
       if c.version == "CDH5":
               cdh5 = c

#Print all hosts
print "Hosts:"
for i in hosts:
        print i

#cdh5.rolling_restart(stale_configs_only=1) works only in Enterprise version

#Get list of all services
print "Services:"
for s in cdh5.get_all_services():
	print s
	if s.type == "HDFS":
Ejemplo n.º 15
0
from cm_api.api_client import ApiResource

cm_host = "insilicodb.ulb.ac.be"

api = ApiResource(cm_host, username="******", password="******")
#print(api.get_all_clusters())

all_hosts = api.get_all_hosts(view='full')
#print(all_hosts)

all_hostnames = set([h.hostname for h in all_hosts])
#print(all_hostnames)

h = all_hosts[0]
#print(h)

#print(h.roleRefs)

role = api.get_cluster('cluster')
#t = role.get_service('hbase').get_role('hbase-MASTER-4e61083dbd483f97174ec27ec055c1d3').get_config(view='full')
t = role.get_service(
    'hdfs'
)  #.get_role('hbase-MASTER-4e61083dbd483f97174ec27ec055c1d3').get_config(view='full')

t = t.get_role('hdfs-NAMENODE-4e61083dbd483f97174ec27ec055c1d3').get_config(
    view='full')
for key, value in t.iteritems():
    print(key)
Ejemplo n.º 16
0
    print line
    print "++Adding HOST to the Cluster"
    addHost=cluster.add_hosts(newHostList)
    //Waiting for 5 minutes so that the parcels get downloaded & distributed & activated
    print "++Wait Time++ 300 seconds"
    time.sleep(300)
     
 
if __name__ == '__main__':
 
    api = ApiResource(clouderaManagerHost, clouderaManagerPort, clouderaManagerUserName, clouderaManagerPassword, use_tls=clouderaManagerHTTPS)
    cluster = api.get_cluster(clusterDisplayName)
    hostlist=[]
 
 
    for hostName in api.get_all_hosts():
        if hostName.hostname in newHosts:
                host = api.get_host(hostName.hostId)
                hostlist.append(host.hostId)
    addHost=addHostToCluster(api,cluster,hostlist)
    start_time=time.time()
    parcel=cluster.get_parcel('CDH',parcelVersion)
     
    //Check for parcel deployment errors.
    print "++ Checking Parcel Deployement"
    while True:
        if parcel.stage == 'ACTIVATED':
            print "CDH Parcels Activated"
            break
        if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
Ejemplo n.º 17
0
def main():
    global ec2con
    global cwcon

    ec2con = boto.ec2.connect_to_region('us-east-1')
    cwcon = boto.ec2.cloudwatch.CloudWatchConnection()

    api = ApiResource(CM_HOST, username="******", password="******")

    displayName = None
    for c in api.get_all_clusters():
        displayName = c.displayName
        print "Cluster: %s (%s)" % (displayName, c.name)
    
    inst_cache = {}

    insts = api.get_all_hosts('full')
    print "Found %s in the cluster" % [inst.hostId for inst in insts.objects]
    for inst in insts.objects:
        clusterName =  inst.roleRefs[0].clusterName
        if clusterName <> c.name:
            print 'Clusters do not correspond: %s vs %s' % (clusterName, c.name)
            continue

        cores = inst.numCores
        inst_id = inst.hostId
        inst_cache[inst_id] = my_cache =  {}
        # For later - we'll send in one data point for every TS query
        # that has AWS data
        my_cache['aws_info_recorded'] = False
        # my_cache['healthSummary'] = inst.healthSummary

        ress = ec2con.get_all_reservations(filters={'instance-id' : inst_id})
        if len(ress) > 0:
            print "Found %s reservations for %s: %s" % (len(ress), inst_id, ress)
        res = ress[0]

        instances = res.instances
        if len(instances) > 1:
            print "Found %s instances for %s %s" % (len(instances), inst_id, instances)
        inst = instances[0]
        if inst.id <> inst_id:
            raise Exception("%s != %s" % (inst.id, inst_id))

        platform = inst.platform
        vpc_id = inst.vpc_id

        if platform == 'windows':
            product = 'Windows'
        elif not platform:
            product = 'Linux_UNIX'
        else:
            product = 'UNKNOWN'
        if vpc_id:
            product += "_Amazon_VPC"

        ami = inst.image_id

        my_cache['product'] = product
        my_cache['region'] = inst.region.name
        my_cache['zone'] = inst.placement
        inst_type = inst.instance_type.replace('.','_')

        my_cache['inst_type'] = inst_type
        
        time_f =  arrow.utcnow().replace(minutes=common.DEFAULT_LOOKBACK_MINUTES)
        time_t = arrow.utcnow()
        # TODO
        # http://arr.gr/blog/2013/08/monitoring-ec2-instance-memory-usage-with-cloudwatch/
        # http://blog.sciencelogic.com/netflix-steals-time-in-the-cloud-and-from-users/03/2011
        # https://www.stackdriver.com/cpu-steal-why-aws-cloudwatch-metrics-are-different-than-agent-metrics/
        stat = cwcon.get_metric_statistics(300,
                                           time_f,
                                           time_t,
                                           'CPUUtilization',
                                           'AWS/EC2',
                                           ['Average','Minimum','Maximum'],
                                           { 'InstanceId' : inst_id })     
            # [{u'Timestamp': datetime.datetime(2014, 4, 13, 6, 5), u'Average': 0.35250000000000004, u'Minimum': 0.33, u'Maximum': 0.42, u'Unit': u'Percent'}]
        print 'Fetching stats for %s: %s' % (inst_id, stat)
        if stat:
            for s in stat:
                ts = common.ts_from_aws(s)
                my_cache['avg_cpu'] = float(s['Average'])
        else:
            print "No stats found for %s" % inst_id
    print "Querying CDH."
    series = api.query_timeseries('SELECT * WHERE clusterName = %s'  % c.name)
    for entry in series.objects[0].timeSeries:
        # print entry.metadata.__dict__
        metric = entry.metadata.metricName
        # internal host
        hostname = ""
        if 'hostname' in entry.metadata.attributes:
            host = entry.metadata.attributes['hostname']
            
        inst_id = ""
        my_cache = {}

        if 'hostId' in entry.metadata.attributes:
            inst_id = entry.metadata.attributes['hostId']
            if inst_id not in my_cache:
                print "Cannot find %s in %s" % (inst_id, inst_cache)
            my_cache = inst_cache[inst_id]
        service_name = ""
        if 'serviceName' in entry.metadata.attributes:
            service_name = entry.metadata.attributes['serviceName']
        service_type = ""
        if 'serviceType' in entry.metadata.attributes:
            service_type= entry.metadata.attributes['serviceType']
        role_type = ""
        if 'roleType' in entry.metadata.attributes:
            role_type = entry.metadata.attributes['roleType']

        
        num = entry.metadata.unitNumerators
        denom = entry.metadata.unitDenominators
        if len(num) > 1:
            print "Num:" + num
        if len(denom)>1:
            print "Denom:" + denom
        unit = num[0]
           
        if len(denom) > 0:
            unit += denom[0]
        tags = {
            'cdh_service_name_service_type_role_type' : "%s.%s.%s" % (
                service_name,
                service_type,
                role_type),
            'unit' : unit
            }
        
        combined_tags = deepcopy(tags)
        if my_cache:
            # combined_tags['healthSummary']= my_cache['healthSummary']
            combined_tags['inst_type'] = my_cache['inst_type']
            combined_tags['cloud'] = 'aws'
            combined_tags['region'] = my_cache['region']
            combined_tags['zone'] = my_cache['zone']
            combined_tags['product'] = my_cache['product']
            
        if not entry.data:
            continue
        
        for sample in entry.data:
            ts = arrow.Arrow.fromdatetime(sample.timestamp).timestamp
            val = sample.value
            if len(combined_tags) > 8:
                print "ERROR: Too many tags: %s" % combined_tags
                sys.exit(0)
            common.otsdb_send(metric, val, combined_tags, ts, False)
            # Do the AWS once only
            if my_cache and not my_cache['aws_info_recorded']:
                # print my_cache
                combined_tags['unit'] = 'percent'
                if 'avg_cpu' in my_cache:
                    common.otsdb_send('aws_average_cpu_utilization', 
                                      my_cache['avg_cpu'],
                                      combined_tags, 
                                      my_cache['ts'], 
                                      False)
Ejemplo n.º 18
0
# Get Cloudera Manager, config, and ODP Cluster
logging.info('Retrieving Cloudera Manager service and cluster instance')
api = ApiResource(cloudera_manager_server_api,
                  7180,
                  management_console_username,
                  management_console_password,
                  version=api_version)
cloudera_manager = ClouderaManager(api)
cloudera_manager_config = api.get_cloudera_manager().get_config(view='full')
cluster_name = 'Open Data Platform'
cluster = api.get_cluster(cluster_name)

# Retrieve all ApiHost objects, locate the management server and add others to clients
logging.info('Retrieving all hosts from cluster')
hosts = api.get_all_hosts()
clients = []
for host in hosts:
    # Suppress Clock Offset warning that incorrectly states chrony is not working
    host.update_config({'host_health_suppression_host_clock_offset': 'true'})

    # Separate Cloudera Manager Server from agents
    if host.hostname == cloudera_management_server_fqdn:
        cloudera_management_server = host
    else:
        clients.append(host)

num_data_nodes = len(
    clients) + 1  # Every node is a datanode, so sum # clients with mgmt server

# Create Zookeeper Service
Ejemplo n.º 19
0
def main():
    config.read([
        "./conf/hadrian.ini", "./conf/cluster_specs.ini",
        "./conf/cloudera-manager/cm.ini"
    ])

    cm_cluster_name = config_grabber("Globals")["cm.cluster.name"]
    cm_username = config_grabber("Globals")["cm.username"]
    cm_password = config_grabber("Globals")["cm.password"]
    cm_port = config_grabber("Globals")["cm.port"]
    version = config_grabber("Globals")["cdh.cluster.version"]
    cm_server = config_grabber(cm_cluster_name + "-hn")["cm.server"]

    #Grab all configuration files in the directory with the CM Cluster Name.

    for i in os.listdir("./conf/" + cm_cluster_name):
        config.read("./conf/" + cm_cluster_name + "/" + i)

    while (get_cm_status(cm_server + ":" + cm_port) != 200):
        logging.info("Waiting for CM Server to start... ")
        time.sleep(15)

    api = ApiResource(cm_server, cm_port, cm_username, cm_password, version=12)
    # create cluster or get existing cluster
    cluster_exists = False
    for i in api.get_all_clusters():
        if i.name == cm_cluster_name:
            cluster_exists = True

    if cluster_exists == False:
        cluster = api.create_cluster(cm_cluster_name, version.upper())
        planned_nodes = config_grabber(cm_cluster_name +
                                       "-hn")["full.list"].split(",")
        for k, v in config_grabber(cm_cluster_name + "-dn").iteritems():
            for j in v.split(","):
                planned_nodes.append(j)

        # TODO make this smarter.  show which agents haven't checked in.  Add the option to continue without them.
        if len(api.get_all_hosts()) != len(planned_nodes):
            logging.info(
                "Waiting for all agents to check into the CM Server before continuing."
            )

            while len(planned_nodes) > api.get_all_hosts():
                logging.info(
                    "Waiting for the final set of CM Agent nodes to check in.")
                time.sleep(5)

        logging.info("Updating Rack configuration for data nodes.")
        all_hosts = list()
        for host in api.get_all_hosts():
            all_hosts.append(host.hostId)
            for k, v in config_grabber(cm_cluster_name + "-dn").iteritems():
                if host.hostname in v:
                    logging.info("Setting host: " + host.hostname +
                                 " to rack /" + k)
                    host.set_rack_id("/" + k)

        logging.info("Adding all hosts to cluster.")
        cluster.add_hosts(all_hosts)

    else:
        cluster = api.get_cluster(cm_cluster_name)

    #Config CM
    logging.info("Applying any configuration changes to Cloudera Manager")
    cmanager = api.get_cloudera_manager()
    cmanager.update_config(config_grabber("cloudera-manager-updates"))
    if os.path.exists("/root/hadrian/cm_license.txt"):
        with open("/root/hadrian/cm_license.txt", "r") as license:
            logging.info("Applying Enterprise License to Cloudera Manager")
            cmanager.update_license(license.read())

    if config_grabber('Globals')['cdh.distribution.method'] == 'parcels':
        # increase the parcel refresh frequency to one minute to find parcel repos in a more timely manner
        cmanager.update_config({"PARCEL_UPDATE_FREQ": 1})
        distribute_parcel(cluster, 'CDH',
                          config_grabber('Globals')['cdh.parcel.version'])
        distribute_parcel(cluster, 'KAFKA',
                          config_grabber('Globals')['kafka.parcel.version'])
        # restore parcel refresh time period to original 60 minutes
        cmanager.update_config({"PARCEL_UPDATE_FREQ": 60})

    # grab current services, so that we can skip services already defined to make this script reentrant
    current_services = []
    for i in cluster.get_all_services():
        current_services.append(i.type)

    if "ZOOKEEPER" not in current_services:
        create_zookeeper_service(cluster)

    if "HDFS" not in current_services:
        create_hdfs_service(cluster, api)

    if "YARN" not in current_services:
        create_yarn_service(cluster)

    if "HIVE" not in current_services:
        create_hive_service(cluster)

    if "IMPALA" not in current_services:
        create_impala_service(cluster)

    if "KAFKA" not in current_services:
        create_kafka_service(cluster)

    if config_grabber("Globals")["kerberos.enabled"].lower() == "true":
        enable_kerberos(cluster, cmanager)
    else:
        logging.info("Starting remaining services.")
        cmd = cluster.start()

        if not cmd.wait(CMD_TIMEOUT).success:
            logging.info(
                "Error in cluster services start. Please review Cloudera Manager for details."
            )
        else:
            logging.info("Remaining cluster services started.")

    logging.info(
        "Starting final client configuration deployment for all services.")
    cmd = cluster.deploy_client_config()

    if not cmd.wait(CMD_TIMEOUT).success:
        logging.info("Failed to deploy client configuration.")
    else:
        logging.info(
            "Client configuration deployment complete.  The cluster is all yours.  Happy Hadooping."
        )
#!/usr/bin/env python
#author Steven
#auto fill the Rack ID if it's /null
import simplejson as json
import urllib2, base64
import re
from cm_api.api_client import ApiResource


def get_rackID(host):
    url="https://cartographer.siri.apple.com/api/v2/hosts?host.hostname="+str(host)
    request = urllib2.Request(url)
    result = urllib2.urlopen(request)
    jsoncont=result.read()
    for i in json.loads(jsoncont):
            #print i
            f=i['asset']['location_in_building'].split(".")
            cm_rack_id="/"+f[0]+"."+f[1]+"."+f[2]+"."+f[3]+"."+f[4]+"."+f[5]+"/"+f[6]
            return cm_rack_id

#print get_rackID("flume001.sp07.siri.apple.com")

api = ApiResource('cm001.sp07.siri.apple.com',version=6,username='******',password='******')
for h in api.get_all_hosts():
    if h.rackId=="/null":
        # if h.hostname=="batch001.sp07.siri.apple.com":
        #     h.set_rack_id("/US.RMR.02.01.0903.06/010")
        print get_rackID(h.hostname),h.hostname
        h.set_rack_id(get_rackID(h.hostname))
        #print h.hostname
Ejemplo n.º 21
0
def main():
  module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS))

  api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=9)
  cluster_name = CLUSTER_NAME

  manager = api.get_cloudera_manager()

  action_a = module.params.get('action', None)

  if action_a == 'create_cluster':
    license_a = module.params.get('license', None)
    version_a = module.params.get('version', None)

    cluster_list = [x.name for x in api.get_all_clusters()]
    if cluster_name in cluster_list:
      module.exit_json(changed=False, msg='Cluster exists')
    else:
      cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a)
      if license_a == None:
        manager.begin_trial()
      else:
        manager.update_license(license_a.decode('base64'))
      module.exit_json(changed=True, msg='Cluster created')
  elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster', 'create_snapshot_policy', 'deploy_configuration']:
    # more complicated actions that need a created cluster go here
    cluster = api.get_cluster(cluster_name)
    host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts())

    # adds a host to the cluster
    # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal
    if action_a == 'add_host':
      host_a = module.params.get('host', None)

      host_list = host_map.keys()
      if host_a in host_list:
        module.exit_json(changed=False, msg='Host already in cluster')
      else:
        try:
          cluster.add_hosts([host_a])
        except ApiException:
          # if a host isn't there, it could be because the agent didn't manage to connect yet
          # so let's wait a moment for it
          sleep(120)
          cluster.add_hosts([host_a])

        module.exit_json(changed=True, msg='Host added')

    # create management service and set it's basic configuration
    # this needs a separate function since management is handled
    # differently than the rest of services
    elif action_a == 'create_mgmt':
      host_a = module.params.get('host', None)

      # getting the management service is the only way to check if mgmt exists
      # an exception means there isn't one
      try:
        mgmt = manager.get_service()
        module.exit_json(changed=False, msg='Mgmt service already exists')
      except ApiException:
        pass

      mgmt = manager.create_mgmt_service(ApiServiceSetupInfo())

      # this is ugly... and I see no good way to unuglify it
      firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")
      reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")

      # since there is no easy way of configuring the manager... let's do it here :(
      role_conf = defaultdict(dict)
      role_conf['ACTIVITYMONITOR'] = {
          'firehose_database_host': '{0}:7432'.format(host_a),
          'firehose_database_user': '******',
          'firehose_database_password': firehose_passwd,
          'firehose_database_type': 'postgresql',
          'firehose_database_name': 'amon',
          'firehose_heapsize': '268435456',
      }
      role_conf['EVENTSERVER'] = {
          'event_server_heapsize': '215964392'
      }
      role_conf['REPORTSMANAGER'] = {
          'headlamp_database_host': '{0}:7432'.format(host_a),
          'headlamp_database_user': '******',
          'headlamp_database_password': reports_passwd,
          'headlamp_database_type': 'postgresql',
          'headlamp_database_name': 'rman',
          'headlamp_heapsize': '268435456',
      }

      roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER']
      # create mangement roles
      for role in roles:
        mgmt.create_role('{0}-1'.format(role), role, host_map[host_a])

      # update configuration of each
      for group in mgmt.get_all_role_config_groups():
        group.update_config(role_conf[group.roleType])

      mgmt.start().wait()
      # after starting this service needs time to spin up
      sleep(30)
      module.exit_json(changed=True, msg='Mgmt created and started')

    # deploy a given parcel on all hosts in the cluster
    # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4
    elif action_a == 'deploy_parcel':
      name_a = module.params.get('name', None)
      version_a = module.params.get('version', None)

      if "latest" in version_a:
        available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a]
        if "-latest" in version_a:
          version_substr = match('(.+?)-latest', version_a).group(1)
        # if version is just "latest", try to check everything
        else:
          version_substr = ".*"
        try:
          [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None]
        except ValueError:
          module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions))
      else:
        version_parcel = version_a

      # we now go through various stages of getting the parcel
      # as there is no built-in way of waiting for an operation to complete
      # we use loops with sleep to get it done
      parcel = cluster.get_parcel(name_a, version_parcel)
      if parcel.stage == 'AVAILABLE_REMOTELY':
        parcel.start_download()

        while parcel.stage != 'DOWNLOADED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          sleep(10)

      if parcel.stage == 'DOWNLOADED':
        parcel.start_distribution()

        while parcel.stage != 'DISTRIBUTED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          # sleep while hosts report problems after the download
          for i in range(12):
            sleep(10)
            if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
              break

      # since parcels are distributed automatically when a new host is added to a cluster
      # we can encounter the ,,ACTIVATING'' stage then
      if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING':
        if parcel.stage == 'DISTRIBUTED':
          parcel.activate()

        while parcel.stage != 'ACTIVATED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          # this sleep has to be large because although the operation is very fast
          # it makes the management and cloudera hosts go bonkers, failing all of the health checks
          sleep(10)

        # sleep while hosts report problems after the distribution
        for i in range(60):
          sleep(10)
          if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
            break

        module.exit_json(changed=True, msg='Parcel activated')

      if parcel.stage == 'ACTIVATED':
        module.exit_json(changed=False, msg='Parcel already activated')

      # if we get down here, something is not right
      module.fail_json(msg='Invalid parcel state')

    # deploy nodes for workers, according to SERVICE_WORKER_MAP
    # also give them sane names and init zookeeper and kafka ones
    # which need id's specified
    elif action_a == 'deploy_service_worker_nodes':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      role_name = SERVICE_WORKER_MAP[service_a]['name']
      full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring']

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      nodes = [x for x in service.get_all_roles() if role_name in x.name]

      # if host already has the given group, we should skip it
      if host_map[host_a] in [x.hostRef.hostId for x in nodes]:
        module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name))
      # find out the highest id that currently exists
      else:
        node_names = [x.name for x in nodes]
        if len(node_names) == 0:
          # if no nodes, start numbering from 1
          node_i = 1
        else:
          # take the max number and add 1 to it
          node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1

        if service_name == 'ZOOKEEPER':
          role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a)
          # zookeeper needs a per-node ID in the configuration, so we set it now
          role.update_config({'serverId': node_i})
        elif service_name == 'KAFKA':
          role = service.create_role(full_role_name.format(node_i), role_name, host_a)
          # kafka needs a per-node ID in the configuration, so we set it now
          role.update_config({'broker.id': node_i})
        else:
          service.create_role(full_role_name.format(node_i), role_name, host_a)

        module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name))

    # deploy a service. just create it, don't do anything more
    # this is needed maily when we have to set service properties before role deployment
    elif action_a == 'deploy_service':
      name_a = module.params.get('name', None)

      if not name_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(name_a))
      service_name = SERVICE_MAP[name_a]
      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
        module.exit_json(changed=True, msg='{0} service created'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} service already exists'.format(service_name))

    # deploy the base hdfs roles (the namenode and secondary)
    # this doesn't create the service, as at least one datanode should already be added!
    # the format also requires certain properties to be set before we run it
    elif action_a == 'deploy_hdfs_base':
      nn_host_a = module.params.get('nn_host', None)
      sn_host_a = module.params.get('sn_host', None)

      changed = False

      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]

      # don't create a secondary namenode when:
      #- there is one that already exists
      #- there is a second namenode, which means we have HA and don't need a secondary
      if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles:
        hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a)
        changed = True

      # create a namenode and format it's FS
      # formating the namenode requires at least one datanode and secondary namenode already in the cluster!
      if not 'HDFS-NAMENODE' in hdfs_roles:
        hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a)
        for command in hdfs.format_hdfs('HDFS-NAMENODE'):
          if command.wait().success == False:
            module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage))
        changed = True

      module.exit_json(changed=changed, msg='Created HDFS service & NN roles')

    # enable HttpFS for HDFS
    # HUE require this for support HA in HDFS
    elif action_a == 'deploy_hdfs_httpfs':
      host_a = module.params.get('host', None)
      
      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]
      
      # don't install second instance of HttpFS
      if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0:
        module.exit_json(changed=False, msg='HDFS HttpFS service already exists')
       
      hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) 
        
      module.exit_json(changed=True, msg='HDFS HttpFS service created')
      
    # enable HA for HDFS
    # this deletes the secondary namenode and creates a second namenode in it's place
    # also, this spawns 3 journal node and 2 failover controller roles
    elif action_a == 'deploy_hdfs_ha':
      sn_host_a = module.params.get('sn_host', None)
      jn_dir_a = module.params.get('jn_dir', None)
      jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)]

      hdfs = cluster.get_service('HDFS')

      # if there's a second namenode, this means we already have HA enabled
      if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]:
        # this is bad and I should feel bad
        # jns is a list of dictionaries, each dict passes the required journalnode parameters
        jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': jn_dir_a, 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)]

        # this call is so long because we set some predictable names for the sevices
        command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER',
                                    active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2')

        children = command.wait().children
        for command_children in children:
          # The format command is expected to fail, since we already formated the namenode
          if command_children.name != 'Format' and command.success == False:
            module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for HDFS service')
      else:
        module.exit_json(changed=False, msg='HDFS HA already enabled')
    # enable HA for YARN
    elif action_a == 'deploy_rm_ha':
      sn_host_a = module.params.get('sn_host', None)

      yarn = cluster.get_service('YARN')

      # if there are two roles matching to this name, this means HA for YARN is enabled
      if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1:
        command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER')
        children = command.wait().children
        for command_children in children:
          if command.success == False:
            module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for YARN service')
      else:
        module.exit_json(changed=False, msg='YARN HA already enabled')

    # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP
    # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP
    elif action_a == 'deploy_base_roles':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      changed = False

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      service_roles = [x.name for x in service.get_all_roles()]

      # create each service from the map
      for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items():
        # check if role already exists, script cant compare it directly
        # after enabling HA on YARN roles will have random strings in names
        if len([0 for x in service_roles if match(role_name, x) != None]) == 0:
          service.create_role(role_name, cloudera_name, host_a)
          changed = True

          # init commmands
          if role_name in SERVICE_INIT_COMMANDS.keys():
            for command_to_run in SERVICE_INIT_COMMANDS[role_name]:
              # different handling of commands specified by name and
              # ones specified by an instance method
              if ismethod(command_to_run):
                command = command_to_run(service)
              else:
                command = service.service_command_by_name(command_to_run)

              if command.wait().success == False:
                module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage))

      if changed == True:
        module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name))

    # deploy configuration - it always return changed
    elif action_a == 'deploy_configuration':
      service_a = module.params.get('service', None)
      service_name = SERVICE_MAP[service_a]
      service = cluster.get_service(service_name)

      # deploying client configuration
      command = service.deploy_client_config()
      if command.wait().success == False:
        module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      module.exit_json(changed=True, msg='Configuration deployed')
        
    # set config values for a given service/role
    elif action_a == 'set_config':
      entity_a = module.params.get('entity', None)
      service_a = module.params.get('service', None)
      role_a = module.params.get('role', None)
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)

      if not service_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(service_a))

      # since management is handled differently, it needs a different service
      if service_a == 'management':
        service = manager.get_service()
      elif service_a == 'cm':
        service = manager
      else:
        service = cluster.get_service(SERVICE_MAP[service_a])

      # role and service configs are handled differently
      if entity_a == 'service':
        prev_config = service.get_config()
        curr_config = service.update_config({name_a: value_a})
        if service_a == 'cm':
          prev_config = [prev_config]
          curr_config = [curr_config]
        module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a]))

      elif entity_a == 'role':
        if not role_a in ROLE_MAP:
          module.fail_json(msg='Unknown role: {0}'.format(service))

        role = service.get_role_config_group(ROLE_MAP[role_a])
        prev_config = role.get_config()
        curr_config = role.update_config({name_a: value_a})
        module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a]))

      else:
        module.fail_json(msg='Invalid entity, must be one of service, role')

    # handle service state
    # currently this only can start/restart a service
    elif action_a == 'service':
      state_a = module.params.get('state', None)
      service_a = module.params.get('service', None)

      try:
        if service_a == 'cm':
          service = manager.get_service()
        else:
          service = cluster.get_service(SERVICE_MAP[service_a])
      except ApiException:
        module.fail_json(msg='Service does not exist')

      # when starting a service, we also deploy the client config for it
      if state_a == 'started':
        if service.serviceState == 'STARTED':
          module.exit_json(changed=False, msg='Service already running')
        method = service.start
        verb = "start"
      elif state_a == 'restarted':
        method = service.restart
        verb = "restart"

      try:
        command = service.deploy_client_config()
        if command.wait().success == False:
          module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      # since there is no way to check if a service handles client config deployments
      # we try our best and pass the exception if it doesn't
      except ApiException, AttributeError:
        pass

      method().wait()
      # we need to wait for cloudera checks to complete...
      # otherwise it will report as failing
      sleep(10)
      for i in range(24):
        sleep(10)
        service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
        if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
          break
      service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
      if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
        module.exit_json(changed=True, msg='Service {0} successful'.format(verb))
      else:
        module.fail_json(msg='Service {0} failed'.format(verb))

    # handle cluster
    # currently this only can restart
    elif action_a == 'cluster':
      state_a = module.params.get('state', None)

      if state_a == 'restarted':
        command = cluster.restart(redeploy_client_configuration=True)
        if command.wait().success == False:
          module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage))
        else:
          module.exit_json(changed=True, msg='Cluster restart successful')

    # Snapshot policy
    # only create is supported
    elif action_a == 'create_snapshot_policy':
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)
      service_a = module.params.get('service', None)
      service = cluster.get_service(SERVICE_MAP[service_a])
      payload=loads(value_a)
      # checking if policy already exists. Exception is expected when configure for the first time.
      try: 
        test = service.get_snapshot_policy(name_a)
        module.exit_json(changed=False, msg='Defined policy already exists')
      except ApiException:
        pass
      try:
        command = service.create_snapshot_policy(payload)
        module.exit_json(changed=True, msg='Snapshot policy was created.')
      except ApiException, AttributeError:
        module.fail_json(msg='ERROR in creating snapshot policy.')
Ejemplo n.º 22
0
def main():
  module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS))

  api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=10)
  cluster_name = CLUSTER_NAME

  manager = api.get_cloudera_manager()

  action_a = module.params.get('action', None)

  if action_a == 'create_cluster':
    license_a = module.params.get('license', None)
    version_a = module.params.get('version', None)

    cluster_list = [x.name for x in api.get_all_clusters()]
    if cluster_name in cluster_list:
      module.exit_json(changed=False, msg='Cluster exists')
    else:
      cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a)
      if license_a == None:
        manager.begin_trial()
      else:
        manager.update_license(license_a.decode('base64'))
      module.exit_json(changed=True, msg='Cluster created')
  elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster','create_snapshot_policy']:
    # more complicated actions that need a created cluster go here
    cluster = api.get_cluster(cluster_name)
    host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts())

    # adds a host to the cluster
    # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal
    if action_a == 'add_host':
      host_a = module.params.get('host', None)

      host_list = host_map.keys()
      if host_a in host_list:
        module.exit_json(changed=False, msg='Host already in cluster')
      else:
        try:
          cluster.add_hosts([host_a])
        except ApiException:
          # if a host isn't there, it could be because the agent didn't manage to connect yet
          # so let's wait a moment for it
          sleep(120)
          cluster.add_hosts([host_a])

        module.exit_json(changed=True, msg='Host added')

    # create management service and set it's basic configuration
    # this needs a separate function since management is handled
    # differently than the rest of services
    elif action_a == 'create_mgmt':
      host_a = module.params.get('host', None)

      # getting the management service is the only way to check if mgmt exists
      # an exception means there isn't one
      try:
        mgmt = manager.get_service()
        module.exit_json(changed=False, msg='Mgmt service already exists')
      except ApiException:
        pass

      mgmt = manager.create_mgmt_service(ApiServiceSetupInfo())

      # this is ugly... and I see no good way to unuglify it
      firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")
      reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")

      # since there is no easy way of configuring the manager... let's do it here :(
      role_conf = defaultdict(dict)
      role_conf['ACTIVITYMONITOR'] = {
          'firehose_database_host': '{0}:7432'.format(host_a),
          'firehose_database_user': '******',
          'firehose_database_password': firehose_passwd,
          'firehose_database_type': 'postgresql',
          'firehose_database_name': 'amon',
          'firehose_heapsize': '268435456',
      }
      role_conf['EVENTSERVER'] = {
          'event_server_heapsize': '215964392'
      }
      role_conf['REPORTSMANAGER'] = {
          'headlamp_database_host': '{0}:7432'.format(host_a),
          'headlamp_database_user': '******',
          'headlamp_database_password': reports_passwd,
          'headlamp_database_type': 'postgresql',
          'headlamp_database_name': 'rman',
          'headlamp_heapsize': '215964392',
      }

      roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER']
      # create mangement roles
      for role in roles:
        mgmt.create_role('{0}-1'.format(role), role, host_map[host_a])

      # update configuration of each
      for group in mgmt.get_all_role_config_groups():
        group.update_config(role_conf[group.roleType])

      mgmt.start().wait()
      # after starting this service needs time to spin up
      sleep(30)
      module.exit_json(changed=True, msg='Mgmt created and started')

    # deploy a given parcel on all hosts in the cluster
    # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4
    elif action_a == 'deploy_parcel':
      name_a = module.params.get('name', None)
      version_a = module.params.get('version', None)

      if "latest" in version_a:
        available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a]
        if "-latest" in version_a:
          version_substr = match('(.+?)-latest', version_a).group(1)
        # if version is just "latest", try to check everything
        else:
          version_substr = ".*"
        try:
          [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None]
        except ValueError:
          module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions))
      else:
        version_parcel = version_a

      # we now go through various stages of getting the parcel
      # as there is no built-in way of waiting for an operation to complete
      # we use loops with sleep to get it done
      parcel = cluster.get_parcel(name_a, version_parcel)
      if parcel.stage == 'AVAILABLE_REMOTELY':
        parcel.start_download()

        while parcel.stage != 'DOWNLOADED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          sleep(10)

      if parcel.stage == 'DOWNLOADED':
        parcel.start_distribution()

        while parcel.stage != 'DISTRIBUTED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          # sleep while hosts report problems after the download
          for i in range(12):
            sleep(10)
            if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
              break

      # since parcels are distributed automatically when a new host is added to a cluster
      # we can encounter the ,,ACTIVATING'' stage then
      if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING':
        if parcel.stage == 'DISTRIBUTED':
          parcel.activate()

        while parcel.stage != 'ACTIVATED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          # this sleep has to be large because although the operation is very fast
          # it makes the management and cloudera hosts go bonkers, failing all of the health checks
          sleep(10)

        # sleep while hosts report problems after the distribution
        for i in range(60):
          sleep(10)
          if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
            break

        module.exit_json(changed=True, msg='Parcel activated')

      if parcel.stage == 'ACTIVATED':
        module.exit_json(changed=False, msg='Parcel already activated')

      # if we get down here, something is not right
      module.fail_json(msg='Invalid parcel state')

    # deploy nodes for workers, according to SERVICE_WORKER_MAP
    # also give them sane names and init zookeeper and kafka ones
    # which need id's specified
    elif action_a == 'deploy_service_worker_nodes':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      role_name = SERVICE_WORKER_MAP[service_a]['name']
      full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring']

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      nodes = [x for x in service.get_all_roles() if role_name in x.name]

      # if host already has the given group, we should skip it
      if host_map[host_a] in [x.hostRef.hostId for x in nodes]:
        module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name))
      # find out the highest id that currently exists
      else:
        node_names = [x.name for x in nodes]
        if len(node_names) == 0:
          # if no nodes, start numbering from 1
          node_i = 1
        else:
          # take the max number and add 1 to it
          node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1

        if service_name == 'ZOOKEEPER':
          role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a)
          # zookeeper needs a per-node ID in the configuration, so we set it now
          role.update_config({'serverId': node_i})
        elif service_name == 'KAFKA':
          role = service.create_role(full_role_name.format(node_i), role_name, host_a)
          # kafka needs a per-node ID in the configuration, so we set it now
          role.update_config({'broker.id': node_i})
        else:
          service.create_role(full_role_name.format(node_i), role_name, host_a)

        module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name))

    # deploy a service. just create it, don't do anything more
    # this is needed maily when we have to set service properties before role deployment
    elif action_a == 'deploy_service':
      name_a = module.params.get('name', None)

      if not name_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(name_a))
      service_name = SERVICE_MAP[name_a]
      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
        module.exit_json(changed=True, msg='{0} service created'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} service already exists'.format(service_name))

    # deploy the base hdfs roles (the namenode and secondary)
    # this doesn't create the service, as at least one datanode should already be added!
    # the format also requires certain properties to be set before we run it
    elif action_a == 'deploy_hdfs_base':
      nn_host_a = module.params.get('nn_host', None)
      sn_host_a = module.params.get('sn_host', None)

      changed = False

      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]

      # don't create a secondary namenode when:
      #- there is one that already exists
      #- there is a second namenode, which means we have HA and don't need a secondary
      if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles:
        hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a)
        changed = True

      # create a namenode and format it's FS
      # formating the namenode requires at least one datanode and secondary namenode already in the cluster!
      if not 'HDFS-NAMENODE' in hdfs_roles:
        hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a)
        for command in hdfs.format_hdfs('HDFS-NAMENODE'):
          if command.wait().success == False:
            module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage))
        changed = True

      module.exit_json(changed=changed, msg='Created HDFS service & NN roles')

    # enable HttpFS for HDFS
    # HUE require this for support HA in HDFS
    elif action_a == 'deploy_hdfs_httpfs':
      host_a = module.params.get('host', None)
      
      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]
      
      # don't install second instance of HttpFS
      if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0:
        module.exit_json(changed=False, msg='HDFS HttpFS service already exists')
       
      hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) 
        
      module.exit_json(changed=True, msg='HDFS HttpFS service created')
      
    # enable HA for HDFS
    # this deletes the secondary namenode and creates a second namenode in it's place
    # also, this spawns 3 journal node and 2 failover controller roles
    elif action_a == 'deploy_hdfs_ha':
      sn_host_a = module.params.get('sn_host', None)
      jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)]

      hdfs = cluster.get_service('HDFS')

      # if there's a second namenode, this means we already have HA enabled
      if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]:
        # this is bad and I should feel bad
        # jns is a list of dictionaries, each dict passes the required journalnode parameters
        jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': '/data0/hadoop/journal', 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)]

        # this call is so long because we set some predictable names for the sevices
        command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER',
                                    active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2')

        children = command.wait().children
        for command_children in children:
          # The format command is expected to fail, since we already formated the namenode
          if command_children.name != 'Format' and command.success == False:
            module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for HDFS service')
      else:
        module.exit_json(changed=False, msg='HDFS HA already enabled')
    # enable HA for YARN
    elif action_a == 'deploy_rm_ha':
      sn_host_a = module.params.get('sn_host', None)

      yarn = cluster.get_service('YARN')

      # if there are two roles matching to this name, this means HA for YARN is enabled
      if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1:
        command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER')
        children = command.wait().children
        for command_children in children:
          if command.success == False:
            module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for YARN service')
      else:
        module.exit_json(changed=False, msg='YARN HA already enabled')

    # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP
    # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP
    elif action_a == 'deploy_base_roles':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      changed = False

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      service_roles = [x.name for x in service.get_all_roles()]

      # create each service from the map
      for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items():
        # check if role already exists, script cant compare it directly
        # after enabling HA on YARN roles will have random strings in names
        if len([0 for x in service_roles if match(role_name, x) != None]) == 0:
          service.create_role(role_name, cloudera_name, host_a)
          changed = True

          # init commmands
          if role_name in SERVICE_INIT_COMMANDS.keys():
            for command_to_run in SERVICE_INIT_COMMANDS[role_name]:
              # different handling of commands specified by name and
              # ones specified by an instance method
              if ismethod(command_to_run):
                command = command_to_run(service)
              else:
                command = service.service_command_by_name(command_to_run)

              if command.wait().success == False:
                module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage))

      if changed == True:
        module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name))

    # set config values for a given service/role
    elif action_a == 'set_config':
      entity_a = module.params.get('entity', None)
      service_a = module.params.get('service', None)
      role_a = module.params.get('role', None)
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)

      if not service_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(service_a))

      # since management is handled differently, it needs a different service
      if service_a == 'management':
        service = manager.get_service()
      elif service_a == 'cm':
        service = manager
      else:
        service = cluster.get_service(SERVICE_MAP[service_a])

      # role and service configs are handled differently
      if entity_a == 'service':
        prev_config = service.get_config()
        curr_config = service.update_config({name_a: value_a})
        if service_a == 'cm':
          prev_config = [prev_config]
          curr_config = [curr_config]
        module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a]))

      elif entity_a == 'role':
        if not role_a in ROLE_MAP:
          module.fail_json(msg='Unknown role: {0}'.format(service))

        role = service.get_role_config_group(ROLE_MAP[role_a])
        prev_config = role.get_config()
        curr_config = role.update_config({name_a: value_a})
        module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a]))

      else:
        module.fail_json(msg='Invalid entity, must be one of service, role')

    # handle service state
    # currently this only can start/restart a service
    elif action_a == 'service':
      state_a = module.params.get('state', None)
      service_a = module.params.get('service', None)

      try:
        if service_a == 'cm':
          service = manager.get_service()
        else:
          service = cluster.get_service(SERVICE_MAP[service_a])
      except ApiException:
        module.fail_json(msg='Service does not exist')

      # when starting a service, we also deploy the client config for it
      if state_a == 'started':
        if service.serviceState == 'STARTED':
          module.exit_json(changed=False, msg='Service already running')
        method = service.start
        verb = "start"
      elif state_a == 'restarted':
        method = service.restart
        verb = "restart"

      try:
        command = service.deploy_client_config()
        if command.wait().success == False:
          module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      # since there is no way to check if a service handles client config deployments
      # we try our best and pass the exception if it doesn't
      except ApiException, AttributeError:
        pass

      method().wait()
      # we need to wait for cloudera checks to complete...
      # otherwise it will report as failing
      sleep(10)
      for i in range(24):
        sleep(10)
        service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
        if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
          break
      service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
      if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
        module.exit_json(changed=True, msg='Service {0} successful'.format(verb))
      else:
        module.fail_json(msg='Service {0} failed'.format(verb))

    # handle cluster
    # currently this only can restart
    elif action_a == 'cluster':
      state_a = module.params.get('state', None)

      if state_a == 'restarted':
        command = cluster.restart(redeploy_client_configuration=True)
        if command.wait().success == False:
          module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage))
        else:
          module.exit_json(changed=True, msg='Cluster restart successful')

    # Snapshot policy
    # only create is supported
    elif action_a == 'create_snapshot_policy':
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)
      service_a = module.params.get('service', None)
      service = cluster.get_service(SERVICE_MAP[service_a])
      payload=loads(value_a)
      # checking if policy already exists. Exception is expected when configure for the first time.
      try: 
        test = service.get_snapshot_policy(name_a)
        module.exit_json(changed=False, msg='Defined policy already exists')
      except ApiException:
        pass
      try:
        command = service.create_snapshot_policy(payload)
        module.exit_json(changed=True, msg='Snapshot policy was created.')
      except ApiException, AttributeError:
        module.fail_json(msg='ERROR in creating snapshot policy.')
Ejemplo n.º 23
0
logger('info',"License type: "+str(LICENSE_TYPE));
logger('info',"CDH_VERSION: " + str(CDH_VERSION) );
logger('info',"CDH_PARCEL_VERSION: "+str(CDH_PARCEL_VERSION));
logger('info',"CLUSTER_NM: "+str(CLUSTER_NM));


# Creating CM Instance
try:
        resource = ApiResource(CM_HOST,CM_PORT,username=ADMIN_USER, password=ADMIN_PASS,version=CM_API_VERSION);
        logger('info',"Creating CM Instance:"+CM_HOST);
except Exception,err:
        logger('warn',"Unable to connect Cloudera Manager:"+CM_HOST);
        logger('error',err);


logger('info',"CM Hosts :"+str(resource.get_all_hosts()));

# Fetching hosts from Ansible host file

HOST_DICT = mutil.ansible_host_dict(logger,HOST_FILE)
HOST_DATA_IP = (mutil.opts_hosts_parse(options.data_nodes) or HOST_DICT['Data-Nodes'])
print HOST_DATA_IP;
HOST_DATA = map(lambda a:mutil.return_host(a,cluster_type),HOST_DATA_IP)


mgmt_nodes = []
if options.mgmt_nodes: mgmt_nodes = list(mutil.opts_hosts_parse(options.mgmt_nodes))

MGMT_1 = mutil.return_host(mgmt_nodes[0] or HOST_DICT['Mgmt-01'][0],cluster_type)
MGMT_2 = mutil.return_host(mgmt_nodes[1] or HOST_DICT['Mgmt-02'][0],cluster_type)
MGMT_3 = mutil.return_host(mgmt_nodes[2] or HOST_DICT['Mgmt-03'][0],cluster_type)
Ejemplo n.º 24
0
        exit(1)

    api = ApiResource(args.cm_host, args.port, args.user, args.password)
    # Get CM object to decommission the host
    cm = api.get_cloudera_manager()
    if args.cluster is None:
        clusters = []
        for c in api.get_all_clusters():
            clusters.append(c)
        if len(clusters) > 1:
            cluster = pick_cluster(clusters)
        else:
            cluster = clusters[0]
    else:
        cluster = api.get_cluster(args.cluster)
    host_list = api.get_all_hosts()

    # Find host object given the hostname as input arg
    for h in host_list:
        if h.hostname == args.impala_host:
            host = h

    impala = get_impala_service(cluster)

    for r in impala.get_all_roles():
        if r.type == "IMPALAD":
            if r.hostRef.hostId == host.hostId:
                impala_host = r

    # Function to check for running impala queries, then shutdown role, then proceed w/ host decom
    check_for_inflight_queries(host.hostname, impala_host.name, impala, args.wait_time)
Ejemplo n.º 25
0
hosts = options.hosts.split(',')

print 'NodeManagers to start/stop:', hosts

#print(options.url.replace('http://','').replace('https://',''))
ssl_cert_path = options.ssl_cert_path
context = ssl.create_default_context(cafile=ssl_cert_path)
api = ApiResource(options.url.replace('http://', '').replace('https://', ''),
                  options.port,
                  options.user,
                  options.password,
                  use_tls=True,
                  ssl_context=context)
host_ids_action = [
    h.hostId for h in api.get_all_hosts() if h.hostname in hosts
]

cm_client.configuration.username = options.user
cm_client.configuration.password = options.password
cm_client.configuration.verify_ssl = True
cm_client.configuration.ssl_ca_cert = ssl_cert_path

# Create an instance of the API class
api_host = options.url
port = options.port
api_version = 'v19'
api_url = api_host + ':' + port + '/api/' + api_version
print(api_url)
api_client = cm_client.ApiClient(api_url)
cluster_api_instance = cm_client.ClustersResourceApi(api_client)
Ejemplo n.º 26
0
def create_cluster(config_dict):
    config.read([
        './conf/hadrian.ini', './conf/cluster_specs.ini',
        './conf/cloudera-manager/cm.ini'
    ])

    cm_cluster_name = config_grabber("Globals")['cm.cluster.name']
    cm_username = config_grabber("Globals")['cm.username']
    cm_password = config_grabber("Globals")['cm.password']
    cm_port = config_grabber("Globals")['cm.port']
    version = config_grabber('Globals')['cdh.cluster.version']
    cm_server = config_grabber(cm_cluster_name + '-en')['cm.server']

    #Grab all configuration files in the directory with the CM Cluster Name.

    for i in os.listdir('./conf/' + cm_cluster_name):
        config.read('./conf/' + cm_cluster_name + '/' + i)

    all_nodes = list()

    while (get_cm_status(cm_server + ':' + cm_port) != 200):
        print 'Waiting for CM Server to start... '
        time.sleep(15)

    api = ApiResource(cm_server, cm_port, cm_username, cm_password)
    # create cluster
    cluster = api.create_cluster(cm_cluster_name, version.upper())

    #Config CM
    print 'Applying any configuration changes to Cloudera Manager'
    cmanager = api.get_cloudera_manager()
    cmanager.update_config(config_grabber('cloudera-manager-updates'))

    planned_nodes = config_grabber(cm_cluster_name +
                                   '-en')['full.list'].split(',')
    for k, v in config_grabber(cm_cluster_name + '-dn').iteritems():
        for j in v.split(','):
            planned_nodes.append(j)

    # TODO make this smarter.  show which agents haven't checked in.  Add the option to continue without them.
    if len(api.get_all_hosts()) != len(planned_nodes):
        print 'Waiting for all agents to check into the CM Server before continuing.'

        while len(planned_nodes) > api.get_all_hosts():
            print 'Waiting for the final set of CM Agent nodes to check in.'
            time.sleep(5)

    print 'Updating Rack configuration for data nodes.'
    all_hosts = list()
    for host in api.get_all_hosts():
        all_hosts.append(host.hostId)
        for k, v in config_grabber(cm_cluster_name + '-dn').iteritems():
            if host.hostname in v:
                print 'Setting host: ' + host.hostname + ' to rack /default/' + k
                host.set_rack_id('/default/' + k)

    print 'Adding all hosts to cluster.'
    cluster.add_hosts(all_hosts)

    # download CDH Parcels
    # TODO add some logic here to make the parcel list something that's read from the hadrian.ini
    # This will allow support for other CDH packages, Search, etc.
    if config_grabber('Globals')['cdh.distribution.method'] == 'parcels':
        distribute_parcel(cluster, 'CDH',
                          config_grabber("Globals")['cdh.parcel.version'])

    if config_dict.get('hdfs_ha') == True:
        create_zookeeper_service(config_dict, cluster)
    create_hdfs_service(config_dict, cluster)

    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configurations'
    else:
        print 'Client configuration deployment complete.'

    create_mapred_service(config_dict, cluster, cm_server)
    if config_dict.get('hbase') == True:
        if config_dict.get('hdfs_ha') == False:
            create_zookeeper_service(config_dict, cluster)
        create_hbase_service(config_dict, cluster)
    if config_dict.get('hive') == True:
        create_hive_service(config_dict, cluster)
    print 'Starting final client configuration deployment for all services.'
    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configuration.'
    else:
        print 'Client configuration deployment complete.  The cluster is all yours.  Happy Hadooping.'