def deploy_cloudera_manager_services(self): """ To deploy the cloudera manager services :return: """ varEnableConfigAlerts = True varServiceGroupName = "cloudera-scm" varServiceUserName = "******" varMgmtServiceConfig = { 'enable_config_alerts': varEnableConfigAlerts, 'process_groupname': varServiceGroupName, 'process_username': varServiceUserName, } varManager = self._cloudera_manager_oconnect.get_cloudera_manager() varMgmt = varManager.create_mgmt_service(ApiServiceSetupInfo()) # update the cloudera service config varMgmt.update_config(varMgmtServiceConfig) # Get the cloudera services configured services = varManager.get_service() varMgmt.create_role("ACTIVITYMONITOR-1", "ACTIVITYMONITOR", self._cloudera_manager_host) varMgmt.create_role("ALERTPUBLISHER-1", "ALERTPUBLISHER", self._cloudera_manager_host) varMgmt.create_role("EVENTSERVER-1", "EVENTSERVER", self._cloudera_manager_host) varMgmt.create_role("HOSTMONITOR-1", "HOSTMONITOR", self._cloudera_manager_host) varMgmt.create_role("SERVICEMONITOR-1", "SERVICEMONITOR", self._cloudera_manager_host) varMgmt.create_role("REPORTSMANAGER-1", "REPORTSMANAGER", self._cloudera_manager_host)
def deploy_mgmt_services(self): """ Configure, deploy and start all the Cloudera Management Services. """ print_json(type="MGMT", msg="Deploying Management Services") try: mgmt = self.manager.get_service() if mgmt.serviceState == 'STARTED': return except ApiException: print_json(type="MGMT", msg="Management Services don't exist. Creating.") mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo()) for role in config['services']['MGMT']['roles']: if not len(mgmt.get_roles_by_type(role['group'])) > 0: print_json(type="MGMT", msg="Creating role for {}".format(role['group'])) mgmt.create_role('{}-1'.format(role['group']), role['group'], role['hosts'][0]) for role in config['services']['MGMT']['roles']: role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format( role['group'])) role_group.update_config(role.get('config', {})) mgmt.start().wait() if self.manager.get_service().serviceState == 'STARTED': print_json(type="MGMT", msg="Management Services started") else: fail( self.module, "[MGMT] Cloudera Management services didn't start up properly")
def deploy_management(self): """ Create and deploy management services """ mgmt_service_config = { 'zookeeper_datadir_autocreate': 'true', } mgmt_role_config = { 'quorumPort': 2888, } amon_role_name = "ACTIVITYMONITOR" amon_role_conf = { 'firehose_database_host': self.cm_host + ":7432", 'firehose_database_user': '******', 'firehose_database_password': self.db_pwd, 'firehose_database_type': 'postgresql', 'firehose_database_name': 'amon', 'firehose_heapsize': '268435456', } apub_role_name = "ALERTPUBLISER" apub_role_conf = {} eserv_role_name = "EVENTSERVER" eserv_role_conf = {'event_server_heapsize': '215964392'} hmon_role_name = "HOSTMONITOR" hmon_role_conf = {} smon_role_name = "SERVICEMONITOR" smon_role_conf = {} mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo()) # create roles mgmt.create_role(amon_role_name + "-1", "ACTIVITYMONITOR", self.cm_host) mgmt.create_role(apub_role_name + "-1", "ALERTPUBLISHER", self.cm_host) mgmt.create_role(eserv_role_name + "-1", "EVENTSERVER", self.cm_host) mgmt.create_role(hmon_role_name + "-1", "HOSTMONITOR", self.cm_host) mgmt.create_role(smon_role_name + "-1", "SERVICEMONITOR", self.cm_host) # now configure each role for group in mgmt.get_all_role_config_groups(): if group.roleType == "ACTIVITYMONITOR": group.update_config(amon_role_conf) elif group.roleType == "ALERTPUBLISHER": group.update_config(apub_role_conf) elif group.roleType == "EVENTSERVER": group.update_config(eserv_role_conf) elif group.roleType == "HOSTMONITOR": group.update_config(hmon_role_conf) elif group.roleType == "SERVICEMONITOR": group.update_config(smon_role_conf) # now start the management service mgmt.start().wait() return mgmt
def create_cms(cloudera_manager, nodes): cms = None try: cms = cloudera_manager.create_mgmt_service(ApiServiceSetupInfo()) cloudera_manager.auto_configure() assign_roles(cms, _CFG.CMS_CFG['roles'], nodes) apply_role_config(cms, _CFG.CMS_CFG['role_cfg']) except Exception: logging.error("Error while creating CMS", exc_info=True) raise return cms
def deploy_management(manager, mgmt_servicename, mgmt_service_conf, mgmt_role_conf, amon_role_name, amon_role_conf, apub_role_name, apub_role_conf, eserv_role_name, eserv_role_conf, hmon_role_name, hmon_role_conf, smon_role_name, smon_role_conf, nav_role_name, nav_role_conf, navms_role_name, navms_role_conf, rman_role_name, rman_role_conf): setup_logger.info( "Deploying Management Service on Host: {}".format(CM_HOST)) mgmt = manager.create_mgmt_service(ApiServiceSetupInfo()) # create roles. Note that host id may be different from host name (especially in CM 5). Look it it up in /api/v5/hosts mgmt.create_role(amon_role_name + "-1", "ACTIVITYMONITOR", CM_HOST) mgmt.create_role(apub_role_name + "-1", "ALERTPUBLISHER", CM_HOST) mgmt.create_role(eserv_role_name + "-1", "EVENTSERVER", CM_HOST) mgmt.create_role(hmon_role_name + "-1", "HOSTMONITOR", CM_HOST) mgmt.create_role(smon_role_name + "-1", "SERVICEMONITOR", CM_HOST) mgmt.create_role(nav_role_name + "-1", "NAVIGATOR", CM_HOST) mgmt.create_role(navms_role_name + "-1", "NAVIGATORMETASERVER", CM_HOST) mgmt.create_role(rman_role_name + "-1", "REPORTSMANAGER", CM_HOST) # Copy dtap and hadoop jars to cloudera manager shared libs shell_command = [ 'sudo cp -f /opt/bluedata/bluedata-dtap.jar /usr/share/cmf/lib' ] popen_util(shell_command, "copy dtap jar to cm shared libs path") # now configure each role for group in mgmt.get_all_role_config_groups(): if group.roleType == "ACTIVITYMONITOR": group.update_config(amon_role_conf) elif group.roleType == "ALERTPUBLISHER": group.update_config(apub_role_conf) elif group.roleType == "EVENTSERVER": group.update_config(eserv_role_conf) elif group.roleType == "HOSTMONITOR": group.update_config(hmon_role_conf) elif group.roleType == "SERVICEMONITOR": group.update_config(smon_role_conf) elif group.roleType == "NAVIGATOR": group.update_config(nav_role_conf) elif group.roleType == "NAVIGATORMETASERVER": group.update_config(navms_role_conf) elif group.roleType == "REPORTSMANAGER": group.update_config(rman_role_conf) # now start the management service mgmt.start().wait() return mgmt
def setup_cms(self): try: self.cm.delete_mgmt_service() except: pass # create the management service try: mgmt = self.cm.create_mgmt_service(ApiServiceSetupInfo()) mgmt.create_role('AlertPublisher', "ALERTPUBLISHER", self.ap_host) mgmt.create_role('EventServer', "EVENTSERVER", self.es_host) mgmt.create_role('HostMonitor', "HOSTMONITOR", self.hm_host) mgmt.create_role('ServiceMonitor', "SERVICEMONITOR", self.sm_host) ok('Cloudera management service created successfully.') except ApiException: info('Cloudera management service had already been created.')
def add_or_update(conf, api, hosts, remove=False): cm = api.get_cloudera_manager() default_host = hosts[conf['cm']['host']] mgmt_cfg = {} if conf['mgmt']['config']: mgmt_cfg = conf['mgmt']['config'] # Check for an existing MGMT service mgmt_svc = None new = False try: mgmt_svc = cm.get_service() LOG.info( "Cloudera Management Service already exists, skipping creation") except ApiException: pass if not mgmt_svc: new = True mgmt_setup_info = ApiServiceSetupInfo(name="mgmt", type="MGMT", config=mgmt_cfg) mgmt_svc = cm.create_mgmt_service(mgmt_setup_info) else: mgmt_svc.update_config(mgmt_cfg) for svc in conf['mgmt']['services']: # Use CM host as default for MGMT hosts svc_host = default_host if 'host' in svc: svc_host = hosts[svc['host']] rolename = "mgmt-%s" % svc['name'] try: mgmt_svc.get_role(rolename) except ApiException: mgmt_svc.create_role(rolename, svc['name'], svc_host) svc_grp = mgmt_svc.get_role_config_group("mgmt-%s-BASE" % svc['name']) svc_grp.update_config(svc['config']) stale, client_stale = get_staleness(cm) if new: start(cm) elif stale: restart(cm)
def deploy_management(manager, mgmt_servicename, mgmt_service_conf, mgmt_role_conf, amon_role_name, amon_role_conf, apub_role_name, apub_role_conf, eserv_role_name, eserv_role_conf, hmon_role_name, hmon_role_conf, smon_role_name, smon_role_conf, rman_role_name, rman_role_conf): mgmt = manager.create_mgmt_service(ApiServiceSetupInfo()) # create roles. Note that host id may be different from host name (especially in CM 5). Look it it up in /api/v5/hosts mgmt.create_role(amon_role_name + "-1", "ACTIVITYMONITOR", cm_config.CM_HOST) mgmt.create_role(apub_role_name + "-1", "ALERTPUBLISHER", cm_config.CM_HOST) mgmt.create_role(eserv_role_name + "-1", "EVENTSERVER", cm_config.CM_HOST) mgmt.create_role(hmon_role_name + "-1", "HOSTMONITOR", cm_config.CM_HOST) mgmt.create_role(smon_role_name + "-1", "SERVICEMONITOR", cm_config.CM_HOST) #mgmt.create_role(nav_role_name + "-1", "NAVIGATOR", CM_HOST) #mgmt.create_role(navms_role_name + "-1", "NAVIGATORMETADATASERVER", CM_HOST) # mgmt.create_role(rman_role_name + "-1", "REPORTSMANAGER", cm_config.CM_HOST) # now configure each role for group in mgmt.get_all_role_config_groups(): if group.roleType == "ACTIVITYMONITOR": group.update_config(amon_role_conf) elif group.roleType == "ALERTPUBLISHER": group.update_config(apub_role_conf) elif group.roleType == "EVENTSERVER": group.update_config(eserv_role_conf) elif group.roleType == "HOSTMONITOR": group.update_config(hmon_role_conf) elif group.roleType == "SERVICEMONITOR": group.update_config(smon_role_conf) # elif group.roleType == "NAVIGATOR": # group.update_config(nav_role_conf) # elif group.roleType == "NAVIGATORMETADATASERVER": # group.update_config(navms_role_conf) # elif group.roleType == "REPORTSMANAGER": # group.update_config(rman_role_conf) # now start the management service mgmt.start().wait() return mgmt
keyfile.close() # Install Cloudera Manager Agent on other servers logging.info( 'Installing Cloudera Manager Agent on management and host servers') cloudera_manager.host_install(user_name=cloudera_user, host_names=hosts, private_key=id_rsa, cm_repo_url=cloudera_manager_repo, gpg_key_custom_url=cloudera_manager_repo_gpg, java_install_strategy="NONE").wait() # Create and start the Cloudera Management Service logging.info('Creating Cloudera Management Service') try: mgmt_setup = ApiServiceSetupInfo() mgmt = cloudera_manager.create_mgmt_service(mgmt_setup) except Exception: mgmt = cloudera_manager.get_service() logging.info('Creating EVENTSERVER role if it does not exist') try: mgmt.create_role("mgmt-es", "EVENTSERVER", cloudera_manager_server) except Exception: logging.info('Role mgmt-es already exists, continuing...') logging.info('Creating SERVICEMONITOR role if it does not exist') try: mgmt.create_role("mgmt-sm", "SERVICEMONITOR", cloudera_manager_server) except Exception: logging.info('Role mgmt-sm already exists, continuing...')
service = s[3].strip() setting = s[5].strip() value = value.strip() if service not in creds: creds[service] = {} creds[service][setting] = value api = ApiResource(sys.argv[1], username="******", password="******", use_tls=False, version=4) cm = api.get_cloudera_manager() roles = [ApiRole(api, t.lower(), t, ApiHostRef(api, sys.argv[1])) for t in ROLE_TYPES] try: service = cm.get_service() except ApiException: mgmt = ApiServiceSetupInfo("management", "MGMT", roles=roles) service = cm.create_mgmt_service(mgmt) rcg = service.get_all_role_config_groups() for rc in rcg: if rc.roleType in ROLE_TYPES: config = {} # Reduce amount of some logs to 1 day if rc.roleType == "ACTIVITYMONITOR": config["firehose_activity_purge_duration_hours"] = "24" config["firehose_attempt_purge_duration_hours"] = "24" config["timeseries_expiration_hours"] = "24" if rc.roleType == "SERVICEMONITOR": config["firehose_storage_dir"]= "/opt/log/cloudera-service-monitor" if rc.roleType == "HOSTMONITOR": config["firehose_storage_dir"]= "/opt/log/cloudera-host-monitor"
def main(): module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS)) api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=9) cluster_name = CLUSTER_NAME manager = api.get_cloudera_manager() action_a = module.params.get('action', None) if action_a == 'create_cluster': license_a = module.params.get('license', None) version_a = module.params.get('version', None) cluster_list = [x.name for x in api.get_all_clusters()] if cluster_name in cluster_list: module.exit_json(changed=False, msg='Cluster exists') else: cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a) if license_a == None: manager.begin_trial() else: manager.update_license(license_a.decode('base64')) module.exit_json(changed=True, msg='Cluster created') elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster', 'create_snapshot_policy', 'deploy_configuration']: # more complicated actions that need a created cluster go here cluster = api.get_cluster(cluster_name) host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts()) # adds a host to the cluster # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal if action_a == 'add_host': host_a = module.params.get('host', None) host_list = host_map.keys() if host_a in host_list: module.exit_json(changed=False, msg='Host already in cluster') else: try: cluster.add_hosts([host_a]) except ApiException: # if a host isn't there, it could be because the agent didn't manage to connect yet # so let's wait a moment for it sleep(120) cluster.add_hosts([host_a]) module.exit_json(changed=True, msg='Host added') # create management service and set it's basic configuration # this needs a separate function since management is handled # differently than the rest of services elif action_a == 'create_mgmt': host_a = module.params.get('host', None) # getting the management service is the only way to check if mgmt exists # an exception means there isn't one try: mgmt = manager.get_service() module.exit_json(changed=False, msg='Mgmt service already exists') except ApiException: pass mgmt = manager.create_mgmt_service(ApiServiceSetupInfo()) # this is ugly... and I see no good way to unuglify it firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n") reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n") # since there is no easy way of configuring the manager... let's do it here :( role_conf = defaultdict(dict) role_conf['ACTIVITYMONITOR'] = { 'firehose_database_host': '{0}:7432'.format(host_a), 'firehose_database_user': '******', 'firehose_database_password': firehose_passwd, 'firehose_database_type': 'postgresql', 'firehose_database_name': 'amon', 'firehose_heapsize': '268435456', } role_conf['EVENTSERVER'] = { 'event_server_heapsize': '215964392' } role_conf['REPORTSMANAGER'] = { 'headlamp_database_host': '{0}:7432'.format(host_a), 'headlamp_database_user': '******', 'headlamp_database_password': reports_passwd, 'headlamp_database_type': 'postgresql', 'headlamp_database_name': 'rman', 'headlamp_heapsize': '268435456', } roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER'] # create mangement roles for role in roles: mgmt.create_role('{0}-1'.format(role), role, host_map[host_a]) # update configuration of each for group in mgmt.get_all_role_config_groups(): group.update_config(role_conf[group.roleType]) mgmt.start().wait() # after starting this service needs time to spin up sleep(30) module.exit_json(changed=True, msg='Mgmt created and started') # deploy a given parcel on all hosts in the cluster # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4 elif action_a == 'deploy_parcel': name_a = module.params.get('name', None) version_a = module.params.get('version', None) if "latest" in version_a: available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a] if "-latest" in version_a: version_substr = match('(.+?)-latest', version_a).group(1) # if version is just "latest", try to check everything else: version_substr = ".*" try: [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None] except ValueError: module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions)) else: version_parcel = version_a # we now go through various stages of getting the parcel # as there is no built-in way of waiting for an operation to complete # we use loops with sleep to get it done parcel = cluster.get_parcel(name_a, version_parcel) if parcel.stage == 'AVAILABLE_REMOTELY': parcel.start_download() while parcel.stage != 'DOWNLOADED': parcel = cluster.get_parcel(name_a, version_parcel) if parcel.state.errors: raise Exception(str(parcel.state.errors)) sleep(10) if parcel.stage == 'DOWNLOADED': parcel.start_distribution() while parcel.stage != 'DISTRIBUTED': parcel = cluster.get_parcel(name_a, version_parcel) if parcel.state.errors: raise Exception(str(parcel.state.errors)) # sleep while hosts report problems after the download for i in range(12): sleep(10) if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0: break # since parcels are distributed automatically when a new host is added to a cluster # we can encounter the ,,ACTIVATING'' stage then if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING': if parcel.stage == 'DISTRIBUTED': parcel.activate() while parcel.stage != 'ACTIVATED': parcel = cluster.get_parcel(name_a, version_parcel) # this sleep has to be large because although the operation is very fast # it makes the management and cloudera hosts go bonkers, failing all of the health checks sleep(10) # sleep while hosts report problems after the distribution for i in range(60): sleep(10) if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0: break module.exit_json(changed=True, msg='Parcel activated') if parcel.stage == 'ACTIVATED': module.exit_json(changed=False, msg='Parcel already activated') # if we get down here, something is not right module.fail_json(msg='Invalid parcel state') # deploy nodes for workers, according to SERVICE_WORKER_MAP # also give them sane names and init zookeeper and kafka ones # which need id's specified elif action_a == 'deploy_service_worker_nodes': host_a = module.params.get('host', None) service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] role_name = SERVICE_WORKER_MAP[service_a]['name'] full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring'] if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) else: service = cluster.get_service(service_name) nodes = [x for x in service.get_all_roles() if role_name in x.name] # if host already has the given group, we should skip it if host_map[host_a] in [x.hostRef.hostId for x in nodes]: module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name)) # find out the highest id that currently exists else: node_names = [x.name for x in nodes] if len(node_names) == 0: # if no nodes, start numbering from 1 node_i = 1 else: # take the max number and add 1 to it node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1 if service_name == 'ZOOKEEPER': role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a) # zookeeper needs a per-node ID in the configuration, so we set it now role.update_config({'serverId': node_i}) elif service_name == 'KAFKA': role = service.create_role(full_role_name.format(node_i), role_name, host_a) # kafka needs a per-node ID in the configuration, so we set it now role.update_config({'broker.id': node_i}) else: service.create_role(full_role_name.format(node_i), role_name, host_a) module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name)) # deploy a service. just create it, don't do anything more # this is needed maily when we have to set service properties before role deployment elif action_a == 'deploy_service': name_a = module.params.get('name', None) if not name_a in SERVICE_MAP: module.fail_json(msg='Unknown service: {0}'.format(name_a)) service_name = SERVICE_MAP[name_a] if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) module.exit_json(changed=True, msg='{0} service created'.format(service_name)) else: module.exit_json(changed=False, msg='{0} service already exists'.format(service_name)) # deploy the base hdfs roles (the namenode and secondary) # this doesn't create the service, as at least one datanode should already be added! # the format also requires certain properties to be set before we run it elif action_a == 'deploy_hdfs_base': nn_host_a = module.params.get('nn_host', None) sn_host_a = module.params.get('sn_host', None) changed = False hdfs = cluster.get_service('HDFS') hdfs_roles = [x.name for x in hdfs.get_all_roles()] # don't create a secondary namenode when: #- there is one that already exists #- there is a second namenode, which means we have HA and don't need a secondary if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles: hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a) changed = True # create a namenode and format it's FS # formating the namenode requires at least one datanode and secondary namenode already in the cluster! if not 'HDFS-NAMENODE' in hdfs_roles: hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a) for command in hdfs.format_hdfs('HDFS-NAMENODE'): if command.wait().success == False: module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage)) changed = True module.exit_json(changed=changed, msg='Created HDFS service & NN roles') # enable HttpFS for HDFS # HUE require this for support HA in HDFS elif action_a == 'deploy_hdfs_httpfs': host_a = module.params.get('host', None) hdfs = cluster.get_service('HDFS') hdfs_roles = [x.name for x in hdfs.get_all_roles()] # don't install second instance of HttpFS if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0: module.exit_json(changed=False, msg='HDFS HttpFS service already exists') hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) module.exit_json(changed=True, msg='HDFS HttpFS service created') # enable HA for HDFS # this deletes the secondary namenode and creates a second namenode in it's place # also, this spawns 3 journal node and 2 failover controller roles elif action_a == 'deploy_hdfs_ha': sn_host_a = module.params.get('sn_host', None) jn_dir_a = module.params.get('jn_dir', None) jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)] hdfs = cluster.get_service('HDFS') # if there's a second namenode, this means we already have HA enabled if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]: # this is bad and I should feel bad # jns is a list of dictionaries, each dict passes the required journalnode parameters jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': jn_dir_a, 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)] # this call is so long because we set some predictable names for the sevices command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER', active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2') children = command.wait().children for command_children in children: # The format command is expected to fail, since we already formated the namenode if command_children.name != 'Format' and command.success == False: module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage)) module.exit_json(changed=True, msg='Enabled HA for HDFS service') else: module.exit_json(changed=False, msg='HDFS HA already enabled') # enable HA for YARN elif action_a == 'deploy_rm_ha': sn_host_a = module.params.get('sn_host', None) yarn = cluster.get_service('YARN') # if there are two roles matching to this name, this means HA for YARN is enabled if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1: command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER') children = command.wait().children for command_children in children: if command.success == False: module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage)) module.exit_json(changed=True, msg='Enabled HA for YARN service') else: module.exit_json(changed=False, msg='YARN HA already enabled') # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP elif action_a == 'deploy_base_roles': host_a = module.params.get('host', None) service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] changed = False if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) else: service = cluster.get_service(service_name) service_roles = [x.name for x in service.get_all_roles()] # create each service from the map for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items(): # check if role already exists, script cant compare it directly # after enabling HA on YARN roles will have random strings in names if len([0 for x in service_roles if match(role_name, x) != None]) == 0: service.create_role(role_name, cloudera_name, host_a) changed = True # init commmands if role_name in SERVICE_INIT_COMMANDS.keys(): for command_to_run in SERVICE_INIT_COMMANDS[role_name]: # different handling of commands specified by name and # ones specified by an instance method if ismethod(command_to_run): command = command_to_run(service) else: command = service.service_command_by_name(command_to_run) if command.wait().success == False: module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage)) if changed == True: module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name)) else: module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name)) # deploy configuration - it always return changed elif action_a == 'deploy_configuration': service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] service = cluster.get_service(service_name) # deploying client configuration command = service.deploy_client_config() if command.wait().success == False: module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage)) module.exit_json(changed=True, msg='Configuration deployed') # set config values for a given service/role elif action_a == 'set_config': entity_a = module.params.get('entity', None) service_a = module.params.get('service', None) role_a = module.params.get('role', None) name_a = module.params.get('name', None) value_a = module.params.get('value', None) if not service_a in SERVICE_MAP: module.fail_json(msg='Unknown service: {0}'.format(service_a)) # since management is handled differently, it needs a different service if service_a == 'management': service = manager.get_service() elif service_a == 'cm': service = manager else: service = cluster.get_service(SERVICE_MAP[service_a]) # role and service configs are handled differently if entity_a == 'service': prev_config = service.get_config() curr_config = service.update_config({name_a: value_a}) if service_a == 'cm': prev_config = [prev_config] curr_config = [curr_config] module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a])) elif entity_a == 'role': if not role_a in ROLE_MAP: module.fail_json(msg='Unknown role: {0}'.format(service)) role = service.get_role_config_group(ROLE_MAP[role_a]) prev_config = role.get_config() curr_config = role.update_config({name_a: value_a}) module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a])) else: module.fail_json(msg='Invalid entity, must be one of service, role') # handle service state # currently this only can start/restart a service elif action_a == 'service': state_a = module.params.get('state', None) service_a = module.params.get('service', None) try: if service_a == 'cm': service = manager.get_service() else: service = cluster.get_service(SERVICE_MAP[service_a]) except ApiException: module.fail_json(msg='Service does not exist') # when starting a service, we also deploy the client config for it if state_a == 'started': if service.serviceState == 'STARTED': module.exit_json(changed=False, msg='Service already running') method = service.start verb = "start" elif state_a == 'restarted': method = service.restart verb = "restart" try: command = service.deploy_client_config() if command.wait().success == False: module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage)) # since there is no way to check if a service handles client config deployments # we try our best and pass the exception if it doesn't except ApiException, AttributeError: pass method().wait() # we need to wait for cloudera checks to complete... # otherwise it will report as failing sleep(10) for i in range(24): sleep(10) service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a]) if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD': break service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a]) if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD': module.exit_json(changed=True, msg='Service {0} successful'.format(verb)) else: module.fail_json(msg='Service {0} failed'.format(verb)) # handle cluster # currently this only can restart elif action_a == 'cluster': state_a = module.params.get('state', None) if state_a == 'restarted': command = cluster.restart(redeploy_client_configuration=True) if command.wait().success == False: module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage)) else: module.exit_json(changed=True, msg='Cluster restart successful') # Snapshot policy # only create is supported elif action_a == 'create_snapshot_policy': name_a = module.params.get('name', None) value_a = module.params.get('value', None) service_a = module.params.get('service', None) service = cluster.get_service(SERVICE_MAP[service_a]) payload=loads(value_a) # checking if policy already exists. Exception is expected when configure for the first time. try: test = service.get_snapshot_policy(name_a) module.exit_json(changed=False, msg='Defined policy already exists') except ApiException: pass try: command = service.create_snapshot_policy(payload) module.exit_json(changed=True, msg='Snapshot policy was created.') except ApiException, AttributeError: module.fail_json(msg='ERROR in creating snapshot policy.')
def set_up_cluster(): # get a handle on the instance of CM that we have running api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=19) # get the CM instance cm = ClouderaManager(api) print "*************************************" print " Starting Auto Deployment of Cluster " print "*************************************" # {'owner': ROAttr(), 'uuid': ROAttr(), 'expiration': ROAttr(),} TRIAL = False try: trial_active = cm.get_license() print trial_active if trial_active.owner == "Trial License": print "Trial License is already set - will NOT continue now." print "Assuming Cluster is already setup" TRIAL = True else: print "Setting up `Trial License`." cm.begin_trial() except: cm.begin_trial() if TRIAL: exit(0) # create the management service service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT") try: if not cm.get_service().name: cm.create_mgmt_service(service_setup) else: print "Service already exist." except: cm.create_mgmt_service(service_setup) # install hosts on this CM instance cmd = cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url, unlimited_jce=True) print "Installing hosts. This might take a while." while cmd.success == None: sleep(5) cmd = cmd.fetch() print cmd if cmd.success != True: print "cm_host_install failed: " + cmd.resultMessage exit(0) print "cm_host_install succeeded" # first auto-assign roles and auto-configure the CM service cm.auto_assign_roles() cm.auto_configure() # create a cluster on that instance cluster = create_cluster(api, cluster_name, cdh_version) # add all our hosts to the cluster cluster.add_hosts(host_list) cluster = api.get_cluster(cluster_name) parcels_list = [] # get and list all available parcels print "Available parcels:" for p in cluster.get_all_parcels(): print '\t' + p.product + ' ' + p.version if p.version.startswith(cdh_version_number) and p.product == "CDH": parcels_list.append(p) if len(parcels_list) == 0: print "No " + cdh_version + " parcel found!" exit(0) cdh_parcel = parcels_list[0] for p in parcels_list: if p.version > cdh_parcel.version: cdh_parcel = p # download the parcel print "Starting parcel download. This might take a while." cmd = cdh_parcel.start_download() if cmd.success != True: print "Parcel download failed!" exit(0) # make sure the download finishes while cdh_parcel.stage != 'DOWNLOADED': sleep(5) cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded" # distribute the parcel print "Starting parcel distribution. This might take a while." cmd = cdh_parcel.start_distribution() if cmd.success != True: print "Parcel distribution failed!" exit(0) # make sure the distribution finishes while cdh_parcel.stage != "DISTRIBUTED": sleep(5) cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " distributed" # activate the parcel cmd = cdh_parcel.activate() if cmd.success != True: print "Parcel activation failed!" exit(0) # make sure the activation finishes while cdh_parcel.stage != "ACTIVATED": cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " activated" # inspect hosts and print the result print "Inspecting hosts. This might take a few minutes." cmd = cm.inspect_hosts() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "Host inpsection failed!" exit(0) print "Hosts successfully inspected: \n" + cmd.resultMessage # create all the services we want to add; we will only create one instance # of each for s in service_types_and_names.keys(): service = cluster.create_service(service_types_and_names[s], s) # we will auto-assign roles; you can manually assign roles using the # /clusters/{clusterName}/services/{serviceName}/role endpoint or by using # ApiService.createRole() cluster.auto_assign_roles() cluster.auto_configure() # # this will set up the Hive and the reports manager databases because we # # can't auto-configure those two things # hive = cluster.get_service(service_types_and_names["HIVE"]) # hive_config = {"hive_metastore_database_host": hive_metastore_host, \ # "hive_metastore_database_name": hive_metastore_name, \ # "hive_metastore_database_password": hive_metastore_password, \ # "hive_metastore_database_port": hive_metastore_database_port, \ # "hive_metastore_database_type": hive_metastore_database_type} # hive.update_config(hive_config) # start the management service cm_service = cm.get_service() cm_service.start().wait() # this will set the Reports Manager database password # first we find the correct role rm_role = None for r in cm.get_service().get_all_roles(): if r.type == "REPORTSMANAGER": rm_role = r if rm_role == None: print "No REPORTSMANAGER role found!" exit(0) # then we get the corresponding role config group -- even though there is # only once instance of each CM management service, we do this just in case # it is not placed in the base group rm_role_group = rm_role.roleConfigGroupRef rm_rcg = get_role_config_group(api, rm_role.type, \ rm_role_group.roleConfigGroupName, None) # update the appropriate fields in the config rm_rcg_config = {"headlamp_database_host": reports_manager_host, \ "headlamp_database_name": reports_manager_name, \ "headlamp_database_user": reports_manager_username, \ "headlamp_database_password": reports_manager_password, \ "headlamp_database_type": reports_manager_database_type} rm_rcg.update_config(rm_rcg_config) # restart the management service with new configs cm_service.restart().wait() # execute the first run command print "Excuting first run command. This might take a while." cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "The first run command failed: " + cmd.resultMessage() exit(0) print "First run successfully executed. Your cluster has been set up!"
def set_up_cluster(cm_host, host_list): print "Setting up CDH cluster..." api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7) cm = ClouderaManager(api) print "Creating mgmg service." try: service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT") cm.create_mgmt_service(service_setup) except ApiException as exc: if exc.code != 400: print "create MGMT service failed: " + exc exit(1) print "Installing hosts. This might take a while." cmd = cm.host_install(host_username, host_list, password=host_password).wait() if cmd.success != True: print "cm_host_install failed: " + cmd.resultMessage exit(2) print "Auto-assign roles and auto-configure the CM service" if not is_cluster_installed(api): cm.auto_assign_roles() cm.auto_configure() print "Creating cluster." if not is_cluster_installed(api): cluster = create_cluster(api, cluster_name, cdh_version) cluster.add_hosts(host_list) cluster = api.get_cluster(cluster_name) cdh_parcel = get_cdh_parcel(cluster) print "Downloading CDH parcel. This might take a while." if cdh_parcel.stage == "AVAILABLE_REMOTELY": cdh_parcel = wait_for_parcel(cdh_parcel.start_download(), api, cdh_parcel, cluster_name, 'DOWNLOADED') print "Distributing CDH parcel. This might take a while." if cdh_parcel.stage == "DOWNLOADED": cdh_parcel = wait_for_parcel(cdh_parcel.start_distribution(), api, cdh_parcel, cluster_name, 'DISTRIBUTED') print "Activating CDH parcel. This might take a while." if cdh_parcel.stage == "DISTRIBUTED": cdh_parcel = wait_for_parcel(cdh_parcel.activate(), api, cdh_parcel, cluster_name, 'ACTIVATED') # if cdh_parcel.stage != "ACTIVATED": # print "CDH parcel activation failed. Parcel in stage: " + cdh_parcel.stage # exit(14) print "Inspecting hosts. This might take a few minutes." cmd = cm.inspect_hosts() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "Host inpsection failed!" exit(8) print "Hosts successfully inspected: \n" + cmd.resultMessage print "Creating specified services." for s in service_types_and_names.keys(): try: cluster.get_service(service_types_and_names[s]) except: print "Creating service: " + service_types_and_names[s] service = cluster.create_service(service_types_and_names[s], s) slaves = [host for host in host_list if 'slave' in host] edges = [host for host in host_list if 'edge' in host] #assign master roles to master node for service in cluster.get_all_services(): if service.name == 'HDFS-1': service.create_role('NAMENODE-1', 'NAMENODE', cm_host) service.create_role('SECONDARYNAMENODE', 'SECONDARYNAMENODE', cm_host) service.create_role('BALANCER-1', 'BALANCER', cm_host) service.create_role('HTTPFS-1', 'HTTPFS', cm_host) service.create_role('HDFS-GW_MASTER1', 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('HDFS-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('DATANODE-%s' % i, 'DATANODE', slave) if service.name == 'ZOOKEEPER-1': service.create_role('ZOOKEEPERSERVER-1', 'SERVER', cm_host) if service.name == 'HBASE-1': service.create_role('MASTER-1', 'MASTER', cm_host) service.create_role('HBASETHRIFTSERVER-1', 'HBASETHRIFTSERVER', cm_host) for (i, slave) in enumerate(slaves): service.create_role('HBASE-RS-%s' % i, 'REGIONSERVER', slave) if service.name == 'HUE-1': service.create_role('HUE-MASTER1', 'HUE_SERVER', cm_host) service.create_role('HUE-LB_MASTER1', 'HUE_LOAD_BALANCER', cm_host) for (i, edge) in enumerate(edges): service.create_role('HUE-EDGE%s' % i, 'HUE_SERVER', edge) if service.name == 'HIVE-1': service.create_role('HIVEMETASTORE-1', 'HIVEMETASTORE', cm_host) service.create_role('HIVESERVER-1', 'HIVESERVER2', cm_host) service.create_role('HIVE-GW_MASTER1', 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('HIVE-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('HIVE-GW_SLAVE%s' % i, 'GATEWAY', slave) if service.name == 'IMPALA-1': service.create_role('STATESTORE-1', 'STATESTORE', cm_host) service.create_role('CATALOGSERVER-1', 'CATALOGSERVER', cm_host) for (i, slave) in enumerate(slaves): service.create_role('IMPALAD-%s' % i, 'IMPALAD', slave) if service.name == 'OOZIE-1': service.create_role('OOZIE_SERVER-1', 'OOZIE_SERVER', cm_host) if service.name == 'SPARK_ON_YARN-1': service.create_role('SPARK_YARN_HISTORY_SERVER-1', 'SPARK_YARN_HISTORY_SERVER', cm_host) service.create_role('SPARK_ON_YARN-GW_MASTER%s' % i, 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('SPARK_ON_YARN-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('SPARK_ON_YARN-GW_SLAVE%s' % i, 'GATEWAY', slave) if service.name == 'SQOOP-1': service.create_role('SQOOP_SERVER-1', 'SQOOP_SERVER', cm_host) if service.name == 'YARN-1': service.create_role('RESOURCEMANAGER-1', 'RESOURCEMANAGER', cm_host) service.create_role('JOBHISTORY-1', 'JOBHISTORY', cm_host) for (i, slave) in enumerate(slaves): service.create_role('NODEMANAGER-%s' % i, 'NODEMANAGER', slave) #print "Auto assigning roles." #cluster.auto_assign_roles() cluster.auto_configure() print "Updating Hive config." hive_metastore_host = cm_host # let's assume that hive = cluster.get_service(service_types_and_names["HIVE"]) hive_config = { "hive_metastore_database_host" : hive_metastore_host, \ "hive_metastore_database_name" : hive_metastore_name, \ "hive_metastore_database_user" : hive_metastore_user, \ "hive_metastore_database_password" : hive_metastore_password, \ "hive_metastore_database_port" : hive_metastore_database_port, \ "hive_metastore_database_type" : hive_metastore_database_type } hive.update_config(hive_config) print "Updating Hue config." hue_db_host = cm_host # let's assume that hue = cluster.get_service(service_types_and_names["HUE"]) hue_config = { "database_host" : hue_db_host, \ "database_name" : hue_db_name, \ "database_user" : hue_db_user, \ "database_password" : hue_db_password, \ "database_port" : hue_db_port, \ "database_type" : hue_db_type } hue.update_config(hue_config) # Set Java version to OpenJDK cm.update_all_hosts_config({'java_home': '/usr/lib/jvm/java-openjdk'}) print "Starting management service." cm_service = cm.get_service() cm_service.start().wait() print "Excuting first run command. This might take a while." cmd = cluster.first_run().wait() if cmd.success != True: print "The first run command failed: " + cmd.resultMessage exit(11) print "First run successfully executed. Your cluster has been set up!" config = cm.get_config(view='full') repolist = config['REMOTE_PARCEL_REPO_URLS'] value = repolist.value or repolist.default value += ',' + anaconda_repo cm.update_config({'REMOTE_PARCEL_REPO_URLS': value}) sleep(10) cluster = api.get_cluster(cluster_name) parcel = cluster.get_parcel('Anaconda', anaconda_parcel_version) print "Downloading Anaconda parcel. This might take a while." if parcel.stage == "AVAILABLE_REMOTELY": parcel = wait_for_parcel(parcel.start_download(), api, parcel, cluster_name, 'DOWNLOADED') print "Distributing Anaconda parcel. This might take a while." if parcel.stage == "DOWNLOADED": parcel = wait_for_parcel(parcel.start_distribution(), api, parcel, cluster_name, 'DISTRIBUTED') print "Activating Anaconda parcel. This might take a while." if parcel.stage == "DISTRIBUTED": parcel = wait_for_parcel(parcel.activate(), api, parcel, cluster_name, 'ACTIVATED') print "Anaconda is now installed."
# time.sleep(15) # #print "Distributed %s" % (parcel['name']) # #p.activate() #if cmd.success != True: # print "Parcel activation failed!" # exit(0) # ## make sure the activation finishes #while cdh_parcel.stage != "ACTIVATED": # cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) #print cdh_parcel.product + ' ' + cdh_parcel.version + " activated" mgmt = manager.create_mgmt_service( ApiServiceSetupInfo(name=management_service_name, type="MGMT")) mgmt.create_role(smon_role_name + "-1", "SERVICEMONITOR", cm_management_host_hostref) mgmt.create_role(hmon_role_name + "-1", "HOSTMONITOR", cm_management_host_hostref) mgmt.create_role(eserv_role_name + "-1", "EVENTSERVER", cm_management_host_hostref) mgmt.create_role(apub_role_name + "-1", "ALERTPUBLISHER", cm_management_host_hostref) mgmt.start().wait() print "Deployed and started Cloudera Management Services" print "Inspecting hosts. This might take a few minutes."
def deploy_management(manager, self): """ Create and deploy new management service """ MGMT_SERVICE_CONFIG = { 'zookeeper_datadir_autocreate': 'true', } MGMT_ROLE_CONFIG = { 'quorumPort': 2888, } AMON_ROLENAME = "ACTIVITYMONITOR" AMON_ROLE_CONFIG = { 'firehose_database_host': self.cm_host + ":7432", 'firehose_database_user': '******', 'firehose_database_password': self.db_pwd, 'firehose_database_type': 'postgresql', 'firehose_database_name': 'amon', 'firehose_heapsize': '215964392', } APUB_ROLENAME = "ALERTPUBLISHER" APUB_ROLE_CONFIG = {} ESERV_ROLENAME = "EVENTSERVER" ESERV_ROLE_CONFIG = { 'event_server_heapsize': '215964392' } HMON_ROLENAME = "HOSTMONITOR" HMON_ROLE_CONFIG = {} SMON_ROLENAME = "SERVICEMONITOR" SMON_ROLE_CONFIG = {} NAV_ROLENAME = "NAVIGATOR" NAV_ROLE_CONFIG = { 'navigator_database_host': self.cm_host + ":7432", 'navigator_database_user': '******', 'navigator_database_password': self.db_pwd, 'navigator_database_type': 'postgresql', 'navigator_database_name': 'nav', 'navigator_heapsize': '215964392', } NAVMS_ROLENAME = "NAVIGATORMETADATASERVER" NAVMS_ROLE_CONFIG = {} RMAN_ROLENAME = "REPORTMANAGER" RMAN_ROLE_CONFIG = { 'headlamp_database_host': self.cm_host + ":7432", 'headlamp_database_user': '******', 'headlamp_database_password': self.db_pwd, 'headlamp_database_type': 'postgresql', 'headlamp_database_name': 'rman', 'headlamp_heapsize': '215964392', } mgmt = manager.create_mgmt_service(ApiServiceSetupInfo()) # create roles. Note that host id may be different from host name (especially in CM 5). Look it it up in /api/v5/hosts mgmt.create_role(amon_role_name + "-1", "ACTIVITYMONITOR", CM_HOST) mgmt.create_role(apub_role_name + "-1", "ALERTPUBLISHER", CM_HOST) mgmt.create_role(eserv_role_name + "-1", "EVENTSERVER", CM_HOST) mgmt.create_role(hmon_role_name + "-1", "HOSTMONITOR", CM_HOST) mgmt.create_role(smon_role_name + "-1", "SERVICEMONITOR", CM_HOST) # mgmt.create_role(nav_role_name + "-1", "NAVIGATOR", CM_HOST) # mgmt.create_role(navms_role_name + "-1", "NAVIGATORMETADATASERVER", CM_HOST) # mgmt.create_role(rman_role_name + "-1", "REPORTSMANAGER", CM_HOST) # now configure each role for group in mgmt.get_all_role_config_groups(): if group.roleType == "ACTIVITYMONITOR": group.update_config(amon_role_conf) elif group.roleType == "ALERTPUBLISHER": group.update_config(apub_role_conf) elif group.roleType == "EVENTSERVER": group.update_config(eserv_role_conf) elif group.roleType == "HOSTMONITOR": group.update_config(hmon_role_conf) elif group.roleType == "SERVICEMONITOR": group.update_config(smon_role_conf) # elif group.roleType == "NAVIGATOR": # group.update_config(nav_role_conf) # elif group.roleType == "NAVIGATORMETADATASERVER": # group.update_config(navms_role_conf) # elif group.roleType == "REPORTSMANAGER": # group.update_config(rman_role_conf) # now start the management service mgmt.start().wait() return mgmt
def create_default_cluster(self): """ Create a default cluster and Cloudera Manager Service on master host """ log.info("Creating a new Cloudera Cluster") # self.cm_host = socket.gethostname() log.debug("Cloudera adding host: {0}".format(self.cm_host)) self.host_list.append(self.cm_host) # create the management service # first check if mamagement service already exists service_setup = ApiServiceSetupInfo(name=self.cm_service_name, type="MGMT") self.cm_manager.create_mgmt_service(service_setup) # install hosts on this CM instance cmd = self.cm_manager.host_install(self.host_username, self.host_list, password=self.host_password, cm_repo_url=self.cm_repo_url) log.debug("Installing hosts. This might take a while...") while cmd.success is None: sleep(5) cmd = cmd.fetch() if cmd.success is not True: log.error("Adding hosts to Cloudera Manager failed: {0}".format(cmd.resultMessage)) log.info("Host added to Cloudera Manager") # first auto-assign roles and auto-configure the CM service self.cm_manager.auto_assign_roles() self.cm_manager.auto_configure() # create a cluster on that instance cluster = self.cm_api_resource.create_cluster(self.cluster_name, self.cdh_version) log.info("Cloudera cluster: {0} created".format(self.cluster_name)) # add all hosts on the cluster cluster.add_hosts(self.host_list) cluster = self.cm_api_resource.get_cluster(self.cluster_name) # get and list all available parcels parcels_list = [] log.debug("Installing parcels...") for p in cluster.get_all_parcels(): print '\t' + p.product + ' ' + p.version if p.version.startswith(self.cdh_version_number) and p.product == "CDH": parcels_list.append(p) if len(parcels_list) == 0: log.error("No {0} parcel found!".format(self.cdh_version)) cdh_parcel = parcels_list[0] for p in parcels_list: if p.version > cdh_parcel.version: cdh_parcel = p # download the parcel log.debug("Starting parcel downloading...") cmd = cdh_parcel.start_download() if cmd.success is not True: log.error("Parcel download failed!") # make sure the download finishes while cdh_parcel.stage != 'DOWNLOADED': sleep(5) cdh_parcel = get_parcel(self.cm_api_resource, cdh_parcel.product, cdh_parcel.version, self.cluster_name) log.info("Parcel: {0} {1} downloaded".format(cdh_parcel.product, cdh_parcel.version)) # distribute the parcel log.info("Distributing parcels...") cmd = cdh_parcel.start_distribution() if cmd.success is not True: log.error("Parcel distribution failed!") # make sure the distribution finishes while cdh_parcel.stage != "DISTRIBUTED": sleep(5) cdh_parcel = get_parcel(self.cm_api_resource, cdh_parcel.product, cdh_parcel.version, self.cluster_name) log.info("Parcel: {0} {1} distributed".format(cdh_parcel.product, cdh_parcel.version)) # activate the parcel log.info("Activating parcels...") cmd = cdh_parcel.activate() if cmd.success is not True: log.error("Parcel activation failed!") # make sure the activation finishes while cdh_parcel.stage != "ACTIVATED": cdh_parcel = get_parcel(self.cm_api_resource, cdh_parcel.product, cdh_parcel.version, self.cluster_name) log.info("Parcel: {0} {1} activated".format(cdh_parcel.product, cdh_parcel.version)) # inspect hosts and print the result log.info("Inspecting hosts. This might take a few minutes") cmd = self.cm_manager.inspect_hosts() while cmd.success is None: sleep(5) cmd = cmd.fetch() if cmd.success is not True: log.error("Host inpsection failed!") log.info("Hosts successfully inspected:\n".format(cmd.resultMessage)) log.info("Cluster {0} installed".format(self.cluster_name))
def deploy_management(manager, host_id_dic): try: mgmt = manager.get_service() except ApiException: print "deploy MGMT service" mgmt = None if mgmt is None: mgmt = manager.create_mgmt_service(ApiServiceSetupInfo(name="MGMT")) activitymonitor_host_id = host_id_dic.values()[random.randint( 0, len(host_id_dic.values()) - 1)] if ACTIVITYMONITOR_HOST != "": activitymonitor_host_id = host_id_dic[ACTIVITYMONITOR_HOST] alertpublisher_host_id = host_id_dic.values()[random.randint( 0, len(host_id_dic.values()) - 1)] if ALERTPUBLISHER_HOST != "": alertpublisher_host_id = host_id_dic[ALERTPUBLISHER_HOST] eventmonitor_host_id = host_id_dic.values()[random.randint( 0, len(host_id_dic.values()) - 1)] if EVENTMONITOR_HOST != "": eventmonitor_host_id = host_id_dic[EVENTMONITOR_HOST] hostmonitor_host_id = host_id_dic.values()[random.randint( 0, len(host_id_dic.values()) - 1)] if HOSTMONITOR_HOST != "": hostmonitor_host_id = host_id_dic[HOSTMONITOR_HOST] servicemonitor_host_id = host_id_dic.values()[random.randint( 0, len(host_id_dic.values()) - 1)] if SERVICEMONITOR_HOST != "": servicemonitor_host_id = host_id_dic[SERVICEMONITOR_HOST] mgmt.create_role("ActivityMonitor", "ACTIVITYMONITOR", activitymonitor_host_id) mgmt.create_role("AlertPublisher", "ALERTPUBLISHER", alertpublisher_host_id) mgmt.create_role("EventServer", "EVENTSERVER", eventmonitor_host_id) mgmt.create_role("HostMonitor", "HOSTMONITOR", hostmonitor_host_id) mgmt.create_role("ServiceMonitor", "SERVICEMONITOR", servicemonitor_host_id) # now configure each role for group in mgmt.get_all_role_config_groups(): if group.roleType == "ACTIVITYMONITOR": group.update_config(AMON_ROLE_CONFIG) elif group.roleType == "ALERTPUBLISHER": group.update_config(APUB_ROLE_CONFIG) elif group.roleType == "EVENTSERVER": group.update_config(ESERV_ROLE_CONFIG) elif group.roleType == "HOSTMONITOR": group.update_config(HMON_ROLE_CONFIG) elif group.roleType == "SERVICEMONITOR": group.update_config(SMON_ROLE_CONFIG) mgmt.start().wait() return mgmt
def deploy_management(manager, mgmt_servicename, amon_role_name, apub_role_name, eserv_role_name, hmon_role_name, smon_role_name, nav_role_name, navms_role_name, rman_role_name): mgmt_service_conf = { 'zookeeper_datadir_autocreate': 'true', } mgmt_role_conf = { 'quorumPort': 2888, } amon_role_conf = { 'firehose_database_host': CM_HOST + ":7432", 'firehose_database_user': '******', 'firehose_database_password': ACTIVITYMONITOR_DB_PASSWORD, 'firehose_database_type': 'postgresql', 'firehose_database_name': 'amon', 'firehose_heapsize': '268435456', } apub_role_conf = {} eserv_role_conf = {'event_server_heapsize': '215964392'} hmon_role_conf = {} smon_role_conf = {} nav_role_conf = { 'navigator_database_host': CM_HOST + ":7432", 'navigator_database_user': '******', 'navigator_database_password': NAVIGATOR_DB_PASSWORD, 'navigator_database_type': 'postgresql', 'navigator_database_name': 'nav', 'navigator_heapsize': '215964392', } navms_role_conf = {} rman_role_conf = { 'headlamp_database_host': CM_HOST + ":7432", 'headlamp_database_user': '******', 'headlamp_database_password': REPORTSMANAGER_DB_PASSWORD, 'headlamp_database_type': 'postgresql', 'headlamp_database_name': 'rman', 'headlamp_heapsize': '215964392', } mgmt = manager.create_mgmt_service(ApiServiceSetupInfo()) # create roles. Note that host id may be different from host name (especially in CM 5). Look it it up in /api/v5/hosts mgmt.create_role(amon_role_name + "-1", "ACTIVITYMONITOR", CM_HOST) mgmt.create_role(apub_role_name + "-1", "ALERTPUBLISHER", CM_HOST) mgmt.create_role(eserv_role_name + "-1", "EVENTSERVER", CM_HOST) mgmt.create_role(hmon_role_name + "-1", "HOSTMONITOR", CM_HOST) mgmt.create_role(smon_role_name + "-1", "SERVICEMONITOR", CM_HOST) #mgmt.create_role(nav_role_name + "-1", "NAVIGATOR", CM_HOST) #mgmt.create_role(navms_role_name + "-1", "NAVIGATORMETADATASERVER", CM_HOST) #mgmt.create_role(rman_role_name + "-1", "REPORTSMANAGER", CM_HOST) # now configure each role for group in mgmt.get_all_role_config_groups(): if group.roleType == "ACTIVITYMONITOR": group.update_config(amon_role_conf) elif group.roleType == "ALERTPUBLISHER": group.update_config(apub_role_conf) elif group.roleType == "EVENTSERVER": group.update_config(eserv_role_conf) elif group.roleType == "HOSTMONITOR": group.update_config(hmon_role_conf) elif group.roleType == "SERVICEMONITOR": group.update_config(smon_role_conf) #elif group.roleType == "NAVIGATOR": # group.update_config(nav_role_conf) #elif group.roleType == "NAVIGATORMETADATASERVER": # group.update_config(navms_role_conf) #elif group.roleType == "REPORTSMANAGER": # group.update_config(rman_role_conf) # now start the management service mgmt.start().wait() return mgmt
def create_cms(cloudera_manager, nodes): cms = None try: cms = cloudera_manager.create_mgmt_service(ApiServiceSetupInfo()) roles = [{ "name": "cms-ap", "type": "ALERTPUBLISHER", "target": "CM" }, { "name": "cms-es", "type": "EVENTSERVER", "target": "CM" }, { "name": "cms-hm", "type": "HOSTMONITOR", "target": "CM" }, { "name": "cms-sm", "type": "SERVICEMONITOR", "target": "CM" }] role_cfg = [{ "type": "ACTIVITYMONITOR", "config": { 'mgmt_log_dir': '/var/log/pnda/cdh/cloudera-scm-firehose' } }, { "type": "ALERTPUBLISHER", "config": { 'mgmt_log_dir': '/var/log/pnda/cdh/cloudera-scm-alertpublisher' } }, { "type": "EVENTSERVER", "config": { 'eventserver_index_dir': '/data0/var/lib/cloudera-scm-eventserver', 'mgmt_log_dir': '/var/log/pnda/cdh/cloudera-scm-eventserver' } }, { "type": "HOSTMONITOR", "config": { 'firehose_storage_dir': '/data0/var/lib/cloudera-host-monitor', 'mgmt_log_dir': '/var/log/pnda/cdh/cloudera-scm-firehose' } }, { "type": "SERVICEMONITOR", "config": { 'firehose_storage_dir': '/data0/var/lib/cloudera-service-monitor', 'mgmt_log_dir': '/var/log/pnda/cdh/cloudera-scm-firehose' } }] cloudera_manager.auto_configure() assign_roles(cms, roles, nodes) apply_role_config(cms, role_cfg) except Exception as exception: logging.error("Error while creating CMS", exc_info=True) return cms
# get the CM instancepython2.7 setuptools cm = ClouderaManager(api) # activate the CM trial license #cm.begin_trial() cmservice=None try: cmservice = cm.get_service() except Exception,e: print Exception,":",e if cmservice is None: # create the management service service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT") cm.create_mgmt_service(service_setup) cmservice = cm.get_service() # install hosts on this CM instance cmd = cm.host_install(user_name=host_username, host_names=host_list, ssh_port=22, password=host_password, private_key=None, passphrase=None, parallel_install_count=None, cm_repo_url=cm_repo_url,gpg_key_custom_url=None, java_install_strategy=None, unlimited_jce=None) print "Installing hosts. This might take a while." while cmd.success == None: sleep(5) cmd = cmd.fetch() if cmd.success != True: print "cm_host_install failed: " + cmd.resultMessage exit(0)
def deploy_management(manager, mgmt_servicename, mgmt_service_conf, mgmt_role_conf, amon_role_name, amon_role_conf, apub_role_name, apub_role_conf, eserv_role_name, eserv_role_conf, hmon_role_name, hmon_role_conf, smon_role_name, smon_role_conf, rman_role_name, rman_role_conf): #mgmt_service already exists # 2 scenarios: if there is no cloudera management service installed: mgmt_service = manager.create_mgmt_service(ApiServiceSetupInfo()) #mgmt_service = manager.get_service() print "mgmt_service:", mgmt_service print "mgmt_service config:", mgmt_service.get_config(view="Full") print "before autoconfgiure:" for group in mgmt_service.get_all_role_config_groups(): print "group:", group print "group_config:", group.get_config() # create roles. Note that host id may be different from host name (especially in CM 5). Look it it up in /api/v5/hosts mgmt_service.create_role(amon_role_name + "-1", "ACTIVITYMONITOR", CM_HOST) mgmt_service.create_role(apub_role_name + "-1", "ALERTPUBLISHER", CM_HOST) mgmt_service.create_role(eserv_role_name + "-1", "EVENTSERVER", CM_HOST) mgmt_service.create_role(hmon_role_name + "-1", "HOSTMONITOR", CM_HOST) mgmt_service.create_role(smon_role_name + "-1", "SERVICEMONITOR", CM_HOST) ##mgmt.create_role(nav_role_name + "-1", "NAVIGATOR", CM_HOST) ##mgmt.create_role(navms_role_name + "-1", "NAVIGATORMETADATASERVER", CM_HOST) mgmt_service.create_role(rman_role_name + "-1", "REPORTSMANAGER", CM_HOST) # now configure each role for group in mgmt_service.get_all_role_config_groups(): if group.roleType == "ACTIVITYMONITOR": group.update_config(amon_role_conf) elif group.roleType == "ALERTPUBLISHER": group.update_config(apub_role_conf) elif group.roleType == "EVENTSERVER": group.update_config(eserv_role_conf) elif group.roleType == "HOSTMONITOR": group.update_config(hmon_role_conf) elif group.roleType == "SERVICEMONITOR": group.update_config(smon_role_conf) # # elif group.roleType == "NAVIGATOR": # # group.update_config(nav_role_conf) # # elif group.roleType == "NAVIGATORMETADATASERVER": # # group.update_config(navms_role_conf) elif group.roleType == "REPORTSMANAGER": group.update_config(rman_role_conf) # now start the management service #mgmt_service.auto_assign_roles() #mgmt_service.auto_configure() print "after autoconfigure:" for group in mgmt_service.get_all_role_config_groups(): print "group:", group print "group_config:", group.get_config() #reports manager not started; how to start manually? mgmt_service.start().wait() return mgmt_service