def do_call(host, port, version, user, password, cluster_name, parcel_name, parcel_version, parcel_repo, init_pre_dir, init_post_dir): api = ApiResource(host, port, user, password, False, version) if not parcel_repo.endswith('/'): parcel_repo += '/' if re.match(REGEX_VERSION, parcel_version) is None or re.match(REGEX_VERSION, parcel_version).group() != parcel_version: raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] expected to match regular expression [' + REGEX_VERSION + ']') if not parcel_repo.endswith(parcel_version + '/'): raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] when compared with repository [' + parcel_repo + ']') cm_config = api.get_cloudera_manager().get_config(view='full') repo_config = cm_config['REMOTE_PARCEL_REPO_URLS'] repo_list = repo_config.value or repo_config.default if parcel_repo not in repo_list: repo_list += ',' + parcel_repo api.get_cloudera_manager().update_config({'REMOTE_PARCEL_REPO_URLS': repo_list}) time.sleep(POLL_SEC) # The parcel synchronize end-point is not exposed via the API, so sleep instead cluster_names = [] if cluster_name is None: for cluster in api.get_all_clusters(): cluster_names.append(cluster.name) else: cluster_names.append(cluster_name) for cluster_name_itr in cluster_names: print 'Cluster [DEPLOYMENT] starting ... ' cluster = api.get_cluster(cluster_name_itr) parcel = cluster.get_parcel(parcel_name, parcel_version) print 'Parcel [DEPLOYMENT] starting ... ' do_parcel_op(cluster, parcel_name, parcel_version, 'DOWNLOAD', 'AVAILABLE_REMOTELY', 'DOWNLOADED', 'start_download') do_parcel_op(cluster, parcel_name, parcel_version, 'DISTRIBUTE', 'DOWNLOADED', 'DISTRIBUTED', 'start_distribution') do_parcel_op(cluster, parcel_name, parcel_version, 'ACTIVATE', 'DISTRIBUTED', 'ACTIVATED', 'activate') parcel = cluster.get_parcel(parcel_name, parcel_version) if parcel.stage != 'ACTIVATED': raise Exception('Parcel is currently mid-stage [' + parcel.stage + '], please wait for this to complete') print 'Parcel [DEPLOYMENT] finished' if init_pre_dir is not None and os.path.isdir(init_pre_dir): print 'Cluster [PRE_INIT] starting ... ' for script in glob.glob(init_pre_dir + '/*.sh'): subprocess.call([script]) print 'Cluster [PRE_INIT] finihsed' print 'Cluster [CONFIG_DEPLOYMENT] starting ... ' cluster.deploy_client_config() cmd = cluster.deploy_client_config() if not cmd.wait(TIMEOUT_SEC).success: raise Exception('Failed to deploy client configs') print 'Cluster [CONFIG_DEPLOYMENT] finihsed' print 'Cluster [STOP] starting ... ' cluster.stop().wait() print 'Cluster [STOP] finihsed' print 'Cluster [START] starting ... ' cluster.start().wait() print 'Cluster [START] finihsed' if init_post_dir is not None and os.path.isdir(init_post_dir): print 'Cluster [POST_INIT] starting ... ' for script in glob.glob(init_post_dir + '/*.sh'): subprocess.call([script]) print 'Cluster [POST_INIT] finihsed' print 'Cluster [DEPLOYMENT] finished'
def main(): """ Kerberizes a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, settings.use_tls, 8) cloudera_manager = api.get_cloudera_manager() cluster = api.get_cluster(settings.cluster) mgmt_service = cloudera_manager.get_service() if verify_cloudera_manager_has_kerberos_principal(cloudera_manager): wait_for_command('Stopping the cluster', cluster.stop()) wait_for_command('Stopping MGMT services', mgmt_service.stop()) configure_services(cluster) wait_for_generate_credentials(cloudera_manager) wait_for_command('Deploying client configs.', cluster.deploy_client_config()) wait_for_command('Deploying cluster client configs', cluster.deploy_cluster_client_config()) wait_for_command('Starting MGMT services', mgmt_service.start()) wait_for_command('Starting the cluster', cluster.start()) else: print "Cluster does not have Kerberos admin credentials. Exiting!" return 0
def main(): """ Add peer to the cluster. @rtype: number @returns: A number representing the status of success. """ settings = parse_args() if len(sys.argv) == 1 or len(sys.argv) > 17: print_usage_message() quit(1) api_target = ApiResource(settings.server, settings.port, settings.username, settings.password, settings.use_tls, 14) cloudera_manager = api_target.get_cloudera_manager() try: cloudera_manager.create_peer(settings.peer_name, settings.source_cm_url, settings.source_user, settings.source_password) print "Peer Successfully Added" except ApiException as error: if 'already exists' in str(error): print 'Peer Already exists' else: raise error return 0
def connect(cm_api, cm_username, cm_password, use_proxy=False): ''' Wait for ten minutes for CM to come up ''' for _ in xrange(120): try: logging.info("Checking CM availability....") # change name of proxy if necessary proxy = urllib2.ProxyHandler({'http': 'proxy'}) api = ApiResource(cm_api, username=cm_username, password=cm_password, version=14) if use_proxy: # pylint: disable=W0212 api._client._opener.add_handler(proxy) cloudera_manager = api.get_cloudera_manager() api.get_user(cm_username) return api, cloudera_manager except Exception: logging.warning("CM is not up") time.sleep(5) logging.error("CM did not come UP") sys.exit(-1)
def connect(cm_api, cm_username, cm_password, use_proxy=False): ''' Wait for ten minutes for CM to come up ''' for _ in xrange(120): try: logging.info("Checking CM availability....") # change name of proxy if necessary proxy = urllib2.ProxyHandler({'http': 'proxy'}) api = ApiResource(cm_api, username=cm_username, password=cm_password, version=14) if use_proxy: # pylint: disable=W0212 api._client._opener.add_handler(proxy) cloudera_manager = api.get_cloudera_manager() api.get_user(cm_username) return api, cloudera_manager except Exception: logging.warning("CM is not up") time.sleep(5) logging.error("CM did not come UP") sys.exit(-1)
def init_cluster(): # wait for all cloudera agent processes to come up setup_logger.info("Creating Clutser.") BDVLIB_ServiceWait([["services", "cloudera_scm_agent", NODE_GROUP_ID]]) # make sure cloudera manager has received registration # for all new agents all_cloudera_hosts = get_hosts_for_service( ["services", "cloudera_scm_agent"]) api = ApiResource(CM_HOST, username=ADMIN_USER, password=ADMIN_PASS) while True: current_all_hosts = map(lambda x: x.hostname, api.get_all_hosts()) setup_logger.info("Currently registered hosts with CM " + str(current_all_hosts)) if all(x in current_all_hosts for x in all_cloudera_hosts): break setup_logger.info( "waiting for new nodes to register with cloudera manager") time.sleep(10) manager = api.get_cloudera_manager() manager.update_config(CM_CONFIG) cluster = api.create_cluster(CLUSTER_NAME, CDH_MAJOR_VERSION, CDH_FULL_VERSION) cluster.add_hosts(ALL_HOSTS) # turn off host swap alerting hosts_swap_alert_off(api) setup_logger.info("Setting Up SPARK2 Repo....") add_spark2_repo(api) ##Set java home setup_logger.info("Setting Up Java Path....") hosts_set_javahome(api) return (cluster, manager)
def init_cluster(): # wait for all cloudera agent processes to come up BDVLIB_ServiceWait( [["services", "cloudera_scm_agent", NODE_GROUP_ID, "kts"]]) # make sure cloudera manager has received registration # for all new agents all_cloudera_hosts = get_hosts_for_service( ["services", "cloudera_scm_agent"]) api = ApiResource(CM_HOST, username="******", password="******") while True: current_all_hosts = map(lambda x: x.hostname, api.get_all_hosts()) setup_logger.info("Currently registered hosts with CM " + str(current_all_hosts)) if all(x in current_all_hosts for x in all_cloudera_hosts): break setup_logger.info( "waiting for new nodes to register with cloudera manager") time.sleep(10) manager = api.get_cloudera_manager() manager.update_config(CM_CONFIG) cluster = api.create_cluster(CLUSTER_NAME, CDH_MAJOR_VERSION, CDH_FULL_VERSION) KTS_HOSTS = ConfigMeta.getWithTokens( ['nodegroups', NODE_GROUP_ID, 'roles', 'kts', 'fqdns']) cluster.add_hosts(KTS_HOSTS) return (cluster, manager)
def main(): """ Kerberizes a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, settings.use_tls, 8) cloudera_manager = api.get_cloudera_manager() cluster = api.get_cluster(settings.cluster) mgmt_service = cloudera_manager.get_service() if verify_cloudera_manager_has_kerberos_principal(cloudera_manager): wait_for_command('Stopping the cluster', cluster.stop()) wait_for_command('Stopping MGMT services', mgmt_service.stop()) configure_services(cluster) wait_for_generate_credentials(cloudera_manager) wait_for_command('Deploying client configs.', cluster.deploy_client_config()) wait_for_command('Deploying cluster client configs', cluster.deploy_cluster_client_config()) wait_for_command('Starting MGMT services', mgmt_service.start()) wait_for_command('Starting the cluster', cluster.start()) else: print "Cluster does not have Kerberos admin credentials. Exiting!" return 0
def main(): # connect cm api api = ApiResource(CM_HOST, 7180, username=CM_USERNAME, password=CM_PASSWORD) manager = api.get_cloudera_manager() # no need to update cm config #manager.update_config(cm_host) print("[INFO] Connected to CM host on " + CM_HOST) # create cluster object try: cluster = api.get_cluster(name=CLUSTER_NAME) except: cluster = init_cluster(api, CLUSTER_NAME, CLUSTER_VERSION, CLUSTER_NODE_COUNT) print("[INFO] Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CLUSTER_VERSION) # mgmt_servicename = "MGMT" amon_role_name = "ACTIVITYMONITOR" apub_role_name = "ALERTPUBLISHER" eserv_role_name = "EVENTSERVER" hmon_role_name = "HOSTMONITOR" smon_role_name = "SERVICEMONITOR" nav_role_name = "NAVIGATOR" navms_role_name = "NAVIGATORMETADATASERVER" rman_role_name = "REPORTMANAGER" deploy_management(manager, mgmt_servicename, amon_role_name, apub_role_name, eserv_role_name, hmon_role_name, smon_role_name, nav_role_name, navms_role_name, rman_role_name) print("[INFO] Deployed CM management service " + mgmt_servicename + " to run on " + CM_HOST) # assign_roles(api, cluster) print("[INFO] all roles have assigned.") # # Custom role config groups cannot be automatically configured: Gateway Group 1 (error 400) try: cluster.auto_configure() except: pass update_custom_config(api, cluster) print("[INFO] all servies and roles have configured.") # cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if not cmd.success: print("[ERROR] The first run command failed: " + cmd.resultMessage()) else: print( "[INFO] First run successfully executed. Your cluster has been set up!" )
def main(cm_host, user, password): api = ApiResource(cm_host, username=user, password=password) cm = api.get_cloudera_manager() cm.update_all_hosts_config( {"java_home": "/usr/java/jdk1.8.0_121-cloudera"}) print("restarting CM service - this will take a minute or so") cm.get_service().restart().wait() print("restarting cluster - this will take 2-5 minutes") api.get_all_clusters()[0].restart(restart_only_stale_services=True, redeploy_client_configuration=True).wait()
def start(host, user, passw): cm_host = str(host) api = ApiResource(cm_host, 7180, username=str(username), password=str(password), version=9) mgmt = api.get_cloudera_manager().get_service() mgmt.restart() print("Services successfully started")
def main(): parser = cm_args_parser() args = parser.parse_args() print "connecting to host:" + args.cm_host + "..." api = ApiResource(args.cm_host, username=args.cm_user, password=args.cm_password) print "host connected, getting cloudera manager " MANAGER = api.get_cloudera_manager() print "have cloudera manager object" deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG) print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + CM_HOST
def connect_cm(cm_host, cm_username, cm_password): """ Connects to Cloudera Manager API Resource instance to retrieve Endpoint details :param cm_host: Cloudera Manager host :param cm_username: Username for authentication :param cm_password: Password for authentication :return: """ api = ApiResource(cm_host, version=6, username=cm_username, password=cm_password) cm_manager = api.get_cloudera_manager() return api, cm_manager
def connect_cm(cm_host, cm_username, cm_password): """ Connects to Cloudera Manager API Resource instance to retrieve Endpoint details :param cm_host: Cloudera Manager host :param cm_username: Username for authentication :param cm_password: Password for authentication :return: """ api = ApiResource(cm_host, version=6, username=cm_username, password=cm_password) cm_manager = api.get_cloudera_manager() return api, cm_manager
def main(): api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=api_num) cm = ClouderaManager(api) #cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url) MANAGER = api.get_cloudera_manager() #MANAGER.update_config) print "Connected to CM host on " + cm_host + " and updated CM configuration" #CLUSTER = init_cluster(api, cluster_name , cdh_version, host_list ,host_list) deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, NAV_ROLENAME, NAV_ROLE_CONFIG, NAVMS_ROLENAME, NAVMS_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG) print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + cm_host + "now service is stop!"
def connect(cm_api, cm_username, cm_password, use_proxy=False): # change name of proxy if necessary proxy = urllib2.ProxyHandler({'http': 'proxy'}) api = ApiResource(cm_api, username=cm_username, password=cm_password) if use_proxy: # pylint: disable=W0212 api._client._opener.add_handler(proxy) cloudera_manager = api.get_cloudera_manager() api.get_user(cm_username) return api, cloudera_manager
def main(cm_host, user, password): api = ApiResource(cm_host, username=user, password=password) cm = api.get_cloudera_manager() config = cm.get_all_hosts_config(view='full') if config['java_home'].value == "/usr/java/jdk1.8.0_121-cloudera": print "Java home already set - skipping" else: print "Updating jdk location" cm.update_all_hosts_config( {"java_home": "/usr/java/jdk1.8.0_121-cloudera"}) print("restarting CM service - this will take a minute or so") cm.get_service().restart().wait() print("restarting cluster - this will take 2-5 minutes") api.get_all_clusters()[0].restart(restart_only_stale_services=True, redeploy_client_configuration=True).wait()
def main(): # connect cm api api = ApiResource(CM_HOST, 7180, username=CM_USERNAME, password=CM_PASSWORD) manager = api.get_cloudera_manager() # no need to update cm config #manager.update_config(cm_host) print("[INFO] Connected to CM host on " + CM_HOST) # create cluster object try: cluster = api.get_cluster(name=CLUSTER_NAME) except: cluster = init_cluster(api, CLUSTER_NAME, CLUSTER_VERSION, CLUSTER_NODE_COUNT) print("[INFO] Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CLUSTER_VERSION) # mgmt_servicename = "MGMT" amon_role_name = "ACTIVITYMONITOR" apub_role_name = "ALERTPUBLISHER" eserv_role_name = "EVENTSERVER" hmon_role_name = "HOSTMONITOR" smon_role_name = "SERVICEMONITOR" nav_role_name = "NAVIGATOR" navms_role_name = "NAVIGATORMETADATASERVER" rman_role_name = "REPORTMANAGER" deploy_management(manager, mgmt_servicename, amon_role_name, apub_role_name, eserv_role_name, hmon_role_name, smon_role_name, nav_role_name, navms_role_name, rman_role_name) print("[INFO] Deployed CM management service " + mgmt_servicename + " to run on " + CM_HOST) # assign_roles(api, cluster) print("[INFO] all roles have assigned.") # # Custom role config groups cannot be automatically configured: Gateway Group 1 (error 400) try: cluster.auto_configure() except: pass update_custom_config(api, cluster) print("[INFO] all servies and roles have configured.") # cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if not cmd.success: print("[ERROR] The first run command failed: " + cmd.resultMessage()) else: print("[INFO] First run successfully executed. Your cluster has been set up!")
def main(): resource = ApiResource("localhost", 7180, "cloudera", "cloudera", version=19) cluster = resource.get_cluster("Cloudera Quickstart") cm_manager = resource.get_cloudera_manager() cm_manager.update_config({'REMOTE_PARCEL_REPO_URLS': PARCEL_REPO}) cm_manager.update_all_hosts_config(JDK_CONFIG) time.sleep(5) for parcel in PARCELS: ParcelInstaller(parcel['name'], parcel['version']).install(cluster) print "Restarting cluster" cluster.stop().wait() cluster.start().wait() print "Done restarting cluster"
def main(): AMON_ROLE_CONFIG = { 'firehose_heapsize': '1173741824', } API = ApiResource("ec2-52-24-151-222.us-west-2.compute.amazonaws.com", version=5, username="******", password="******") MANAGER = API.get_cloudera_manager() mgmt = MANAGER.get_service() #cf = mgmt.get_config() for group in mgmt.get_all_role_config_groups(): if group.roleType == "ACTIVITYMONITOR": group.update_config(AMON_ROLE_CONFIG) pass
def main(): parser = cm_args_parser() args = parser.parse_args() print "connecting to host:" + args.cm_host + "..." api = ApiResource(args.cm_host, username=args.cm_user, password=args.cm_password) print "host connected, getting cloudera manager " MANAGER = api.get_cloudera_manager() print "have cloudera manager object" deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG) print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + CM_HOST
def main(): """ Enables HDFS HA on a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, version=6) if not validate_cluster(api, settings.cluster): write_to_stdout( "Cluster does not satisfy preconditions for enabling HDFS HA. Exiting!" ) return 1 if settings.wait_for_good_health: write_to_stdout("Waiting for GOOD health... ") if not wait_for_good_health(api, settings.cluster): write_to_stdout("Cluster health is not GOOD. Exiting!\n") return 1 else: write_to_stdout("Checking cluster health... ") if not check_health(api, settings.cluster): write_to_stdout("Cluster health is not GOOD. Exiting!\n") write_to_stdout("Cluster health is GOOD!\n") cluster = api.get_cluster(settings.cluster) invoke_hdfs_enable_nn_ha(cluster, settings.nameservice) update_hive_for_ha_hdfs(cluster) # Restarting the MGMT services to make sure the HDFS file browser functions # as expected. cloudera_manager = api.get_cloudera_manager() mgmt_service = cloudera_manager.get_service() wait_for_command('Restarting MGMT services', mgmt_service.restart()) return 0
def main(): """ Enables HDFS HA on a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, version=6) if not validate_cluster(api, settings.cluster): write_to_stdout("Cluster does not satisfy preconditions for enabling HDFS HA. Exiting!") return 1 if settings.wait_for_good_health: write_to_stdout("Waiting for GOOD health... ") if not wait_for_good_health(api, settings.cluster): write_to_stdout("Cluster health is not GOOD. Exiting!\n") return 1 else: write_to_stdout("Checking cluster health... ") if not check_health(api, settings.cluster): write_to_stdout("Cluster health is not GOOD. Exiting!\n") write_to_stdout("Cluster health is GOOD!\n") cluster = api.get_cluster(settings.cluster) invoke_hdfs_enable_nn_ha(cluster, settings.nameservice) update_hive_for_ha_hdfs(cluster) # Restarting the MGMT services to make sure the HDFS file browser functions # as expected. cloudera_manager = api.get_cloudera_manager() mgmt_service = cloudera_manager.get_service() wait_for_command('Restarting MGMT services', mgmt_service.restart()) return 0
# FIXME: could be removed in future version? f = file('/etc/cloudera-scm-server/db.mgmt.properties') for line in f: if not line.startswith("#"): (key, value) = line.split("=") s = key.split('.') service = s[3].strip() setting = s[5].strip() value = value.strip() if service not in creds: creds[service] = {} creds[service][setting] = value api = ApiResource(sys.argv[1], username="******", password="******", use_tls=False, version=4) cm = api.get_cloudera_manager() roles = [ApiRole(api, t.lower(), t, ApiHostRef(api, sys.argv[1])) for t in ROLE_TYPES] try: service = cm.get_service() except ApiException: mgmt = ApiServiceSetupInfo("management", "MGMT", roles=roles) service = cm.create_mgmt_service(mgmt) rcg = service.get_all_role_config_groups() for rc in rcg: if rc.roleType in ROLE_TYPES: config = {} # Reduce amount of some logs to 1 day if rc.roleType == "ACTIVITYMONITOR": config["firehose_activity_purge_duration_hours"] = "24"
# Loading Cluster config CLUSTER_NM=str(options.cluster_nm ) CDH_VERSION=str(options.cdh_version) CDH_PARCEL_VERSION=CDH_VERSION.split('-')[0] HOST_FILE = str(options.host_file ) HOST_USER = str(options.host_user) HOST_PASS = str(options.host_pass ) LICENSE_TYPE = int(options.license_type or "1") LOG_FILE = "/".join(os.path.realpath( __file__ ).split("/")[:-2])+'/log/scale_cluster_exechistory_'+str(os.getpid())+'.log'; OUTPUT_TEMPLATE = "/".join(os.path.realpath( __file__ ).split("/")[:-2])+"/output/final_template_1.json"; CLUSTER_HOSTS = [] # Adding cloudera parcel URL api = ApiResource(CM_HOST, CM_PORT , ADMIN_USER, ADMIN_PASS, version=CM_API_VERSION ) api.get_cloudera_manager().update_config({"REMOTE_PARCEL_REPO_URLS": BASE_URL + "/cdh5/parcels/" + CDH_PARCEL_VERSION + "/"}) def logger(logType,msg): ''' Logger v1 logType : [debug|info|warn|error|critical] msg : Log message :return: ''' logging.basicConfig(filename=LOG_FILE,level=logging.DEBUG,format='%(asctime)s.%(msecs)d %(levelname)s %(module)s - %(funcName)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") if logType == 'debug': logging.debug(msg) elif logType == 'info': logging.info(msg) elif logType == 'warn': logging.warning(msg) elif logType == 'error': logging.error(msg)
def main(): API = ApiResource(CM_HOST, version=5, username=ADMIN_USER, password=ADMIN_PASS) MANAGER = API.get_cloudera_manager() MANAGER.update_config(CM_CONFIG) print "Connected to CM host on " + CM_HOST + " and updated CM configuration" CLUSTER = init_cluster(API, CLUSTER_NAME, CDH_VERSION, CLUSTER_HOSTS, CM_HOST) print "Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CDH_VERSION deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, NAV_ROLENAME, NAV_ROLE_CONFIG, NAVMS_ROLENAME, NAVMS_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG) print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + CM_HOST deploy_parcels(CLUSTER, PARCELS) print "Downloaded and distributed parcels: " PRETTY_PRINT.pprint(PARCELS) zookeeper_service = deploy_zookeeper(CLUSTER, ZOOKEEPER_SERVICE_NAME, ZOOKEEPER_HOSTS, ZOOKEEPER_SERVICE_CONFIG, ZOOKEEPER_ROLE_CONFIG) print "Deployed ZooKeeper " + ZOOKEEPER_SERVICE_NAME + " to run on: " PRETTY_PRINT.pprint(ZOOKEEPER_HOSTS) hdfs_service = deploy_hdfs(CLUSTER, HDFS_SERVICE_NAME, HDFS_SERVICE_CONFIG, HDFS_NAMENODE_SERVICE_NAME, HDFS_NAMENODE_HOST, HDFS_NAMENODE_CONFIG, HDFS_SECONDARY_NAMENODE_HOST, HDFS_SECONDARY_NAMENODE_CONFIG, HDFS_DATANODE_HOSTS, HDFS_DATANODE_CONFIG, HDFS_GATEWAY_HOSTS, HDFS_GATEWAY_CONFIG) print "Deployed HDFS service " + HDFS_SERVICE_NAME + " using NameNode on " + HDFS_NAMENODE_HOST + ", SecondaryNameNode on " + HDFS_SECONDARY_NAMENODE_HOST + ", and DataNodes running on: " PRETTY_PRINT.pprint(HDFS_DATANODE_HOSTS) init_hdfs(hdfs_service, HDFS_SERVICE_NAME, CMD_TIMEOUT) print "Initialized HDFS service" # mapred and yarn are mutually exclusive; only deploy one of them #mapred_service = deploy_mapreduce(CLUSTER, MAPRED_SERVICE_NAME, MAPRED_SERVICE_CONFIG, MAPRED_JT_HOST, MAPRED_JT_CONFIG, MAPRED_TT_HOSTS, MAPRED_TT_CONFIG, MAPRED_GW_HOSTS, MAPRED_GW_CONFIG) print "Deployed MapReduce service " + MAPRED_SERVICE_NAME + " using JobTracker on " + MAPRED_JT_HOST + " and TaskTrackers running on " PRETTY_PRINT.pprint(MAPRED_TT_HOSTS) yarn_service = deploy_yarn(CLUSTER, YARN_SERVICE_NAME, YARN_SERVICE_CONFIG, YARN_RM_HOST, YARN_RM_CONFIG, YARN_JHS_HOST, YARN_JHS_CONFIG, YARN_NM_HOSTS, YARN_NM_CONFIG, YARN_GW_HOSTS, YARN_GW_CONFIG) print "Deployed YARN service " + YARN_SERVICE_NAME + " using ResourceManager on " + YARN_RM_HOST + ", JobHistoryServer on " + YARN_JHS_HOST + ", and NodeManagers on " PRETTY_PRINT.pprint(YARN_NM_HOSTS) spark_service = deploy_spark(CLUSTER, SPARK_SERVICE_NAME, SPARK_SERVICE_CONFIG, SPARK_MASTER_HOST, SPARK_MASTER_CONFIG, SPARK_WORKER_HOSTS, SPARK_WORKER_CONFIG, SPARK_GW_HOSTS, SPARK_GW_CONFIG) print "Deployed SPARK service " + SPARK_SERVICE_NAME + " using SparkMaster on " + SPARK_MASTER_HOST + " and SparkWorkers on " PRETTY_PRINT.pprint(SPARK_WORKER_HOSTS) deploy_hbase(CLUSTER, HBASE_SERVICE_NAME, HBASE_SERVICE_CONFIG, HBASE_HM_HOST, HBASE_HM_CONFIG, HBASE_RS_HOSTS, HBASE_RS_CONFIG, HBASE_THRIFTSERVER_SERVICE_NAME, HBASE_THRIFTSERVER_HOST, HBASE_THRIFTSERVER_CONFIG, HBASE_GW_HOSTS, HBASE_GW_CONFIG) print "Deployed HBase service " + HBASE_SERVICE_NAME + " using HMaster on " + HBASE_HM_HOST + " and RegionServers on " PRETTY_PRINT.pprint(HBASE_RS_HOSTS) hive_service = deploy_hive(CLUSTER, HIVE_SERVICE_NAME, HIVE_SERVICE_CONFIG, HIVE_HMS_HOST, HIVE_HMS_CONFIG, HIVE_HS2_HOST, HIVE_HS2_CONFIG, HIVE_WHC_HOST, HIVE_WHC_CONFIG, HIVE_GW_HOSTS, HIVE_GW_CONFIG) print "Depoyed Hive service " + HIVE_SERVICE_NAME + " using HiveMetastoreServer on " + HIVE_HMS_HOST + " and HiveServer2 on " + HIVE_HS2_HOST init_hive(hive_service) print "Initialized Hive service" impala_service = deploy_impala(CLUSTER, IMPALA_SERVICE_NAME, IMPALA_SERVICE_CONFIG, IMPALA_SS_HOST, IMPALA_SS_CONFIG, IMPALA_CS_HOST, IMPALA_CS_CONFIG, IMPALA_ID_HOSTS, IMPALA_ID_CONFIG) print "Deployed Impala service " + IMPALA_SERVICE_NAME + " using StateStore on " + IMPALA_SS_HOST + ", CatalogServer on " + IMPALA_CS_HOST + ", and ImpalaDaemons on " PRETTY_PRINT.pprint(IMPALA_ID_HOSTS) #Need to start the cluster now as subsequent services need the cluster to be runnign #TODO can we just start ZK, and maybe HDFS, instead of everything? It's just needed for the search service print "About to restart cluster" CLUSTER.stop().wait() CLUSTER.start().wait() print "Done restarting cluster" search_service = deploy_search(CLUSTER, SEARCH_SERVICE_NAME, SEARCH_SERVICE_CONFIG, SEARCH_SOLR_HOST, SEARCH_SOLR_CONFIG, SEARCH_GW_HOSTS, SEARCH_GW_CONFIG) print "Deployed Search service " + SEARCH_SERVICE_NAME + " using SOLRHost " + SEARCH_SOLR_HOST flume_service = deploy_flume(CLUSTER, FLUME_SERVICE_NAME, FLUME_SERVICE_CONFIG, FLUME_AGENT_HOSTS, FLUME_AGENT_CONFIG) print "Deployed Flume service " + FLUME_SERVICE_NAME + " using FlumeAgents on " PRETTY_PRINT.pprint(FLUME_AGENT_HOSTS) oozie_service = deploy_oozie(CLUSTER, OOZIE_SERVICE_NAME, OOZIE_SERVICE_CONFIG, OOZIE_SERVER_HOST, OOZIE_SERVER_CONFIG) print "Deployed Oozie service " + OOZIE_SERVICE_NAME + " using OozieServer on " + OOZIE_SERVER_HOST sqoop_service = deploy_sqoop(CLUSTER, SQOOP_SERVICE_NAME, SQOOP_SERVICE_CONFIG, SQOOP_SERVER_HOST, SQOOP_SERVER_CONFIG) print "Deployed Sqoop service " + SQOOP_SERVICE_NAME + " using SqoopServer on " + SQOOP_SERVER_HOST hue_service = deploy_hue(CLUSTER, HUE_SERVICE_NAME, HUE_SERVICE_CONFIG, HUE_SERVER_HOST, HUE_SERVER_CONFIG, HUE_KTR_HOST, HUE_KTR_CONFIG) print "Deployed HUE service " + HUE_SERVICE_NAME + " using HueServer on " + HUE_SERVER_HOST #deploy_accumulo(CLUSTER, ACCUMULO_SERVICE_NAME, ACCUMULO_SERVICE_CONFIG, ACCUMULO_MASTER_HOSTS, ACCUMULO_MASTER_CONFIG, ACCUMULO_TRACER_HOSTS, ACCUMULO_TRACER_CONFIG, ACCUMULO_TSERVER_HOSTS, ACCUMULO_TSERVER_CONFIG, ACCUMULO_LOGGER_HOSTS, ACCUMULO_LOGGER_CONFIG, ACCUMULO_MONITOR_HOST, ACCUMULO_MONITOR_CONFIG, ACCUMULO_GC_HOST, ACCUMULO_GC_CONFIG, ACCUMULO_GATEWAY_HOSTS, ACCUMULO_GATEWAY_CONFIG) print "About to restart cluster." CLUSTER.stop().wait() CLUSTER.start().wait() print "Done restarting cluster." post_startup(CLUSTER, hdfs_service, oozie_service) print "Finished deploying Cloudera cluster. Go to http://" + CM_HOST + ":7180 to administer the cluster." print "If the Oozie service (and therefore the HUE service as well, which depends on it) did not start properly, go to the Oozie service, stop it, click on the Actions button and choose 'Create Database', then start it." print "If there are any other services not running, restart them now."
class ClouderaManager(object): """ The complete orchestration of a cluster from start to finish assuming all the hosts are configured and Cloudera Manager is installed with all the required databases setup. Handle all the steps required in creating a cluster. All the functions are built to function idempotently. So you should be able to resume from any failed step but running thru the __class__.setup() """ def __init__(self, module, config, trial=False, license_txt=None): self.api = ApiResource(config['cm']['host'], username=config['cm']['username'], password=config['cm']['password']) self.manager = self.api.get_cloudera_manager() self.config = config self.module = module self.trial = trial self.license_txt = license_txt self.cluster = None def enable_license(self): """ Enable the requested license, either it's trial mode or a full license is entered and registered. """ try: _license = self.manager.get_license() except ApiException: print_json(type="LICENSE", msg="Enabling license") if self.trial: self.manager.begin_trial() else: if license_txt is not None: self.manager.update_license(license_txt) else: fail(self.module, 'License should be provided or trial should be specified') try: _license = self.manager.get_license() except ApiException: fail(self.module, 'Failed enabling license') print_json(type="LICENSE", msg="Owner: {}, UUID: {}".format(_license.owner, _license.uuid)) def create_cluster(self): """ Create a cluster and add hosts to the cluster. A new cluster is only created if another one doesn't exist with the same name. """ print_json(type="CLUSTER", msg="Creating cluster") cluster_config = self.config['cluster'] try: self.cluster = self.api.get_cluster(cluster_config['name']) except ApiException: print_json(type="CLUSTER", msg="Creating Cluster entity: {}".format(cluster_config['name'])) self.cluster = self.api.create_cluster(cluster_config['name'], cluster_config['version'], cluster_config['fullVersion']) cluster_hosts = [self.api.get_host(host.hostId).hostname for host in self.cluster.list_hosts()] hosts = [] for host in cluster_config['hosts']: if host not in cluster_hosts: hosts.append(host) self.cluster.add_hosts(hosts) def activate_parcels(self): print_json(type="PARCELS", msg="Setting up parcels") for parcel_cfg in self.config['parcels']: parcel = Parcels(self.module, self.manager, self.cluster, parcel_cfg.get('version'), parcel_cfg.get('repo'), parcel_cfg.get('product', 'CDH')) parcel.download() parcel.distribute() parcel.activate() @retry(attempts=20, delay=5) def wait_inspect_hosts(self, cmd): """ Inspect all the hosts. Basically wait till the check completes on all hosts. :param cmd: A command instance used for tracking the status of the command """ print_json(type="HOSTS", msg="Inspecting hosts") cmd = cmd.fetch() if cmd.success is None: raise ApiException("Waiting on command {} to finish".format(cmd)) elif not cmd.success: if (cmd.resultMessage is not None and 'is not currently available for execution' in cmd.resultMessage): raise ApiException('Retry Command') fail(self.module, 'Host inspection failed') print_json(type="HOSTS", msg="Host inspection completed: {}".format(cmd.resultMessage)) def deploy_mgmt_services(self): """ Configure, deploy and start all the Cloudera Management Services. """ print_json(type="MGMT", msg="Deploying Management Services") try: mgmt = self.manager.get_service() if mgmt.serviceState == 'STARTED': return except ApiException: print_json(type="MGMT", msg="Management Services don't exist. Creating.") mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo()) for role in config['services']['MGMT']['roles']: if not len(mgmt.get_roles_by_type(role['group'])) > 0: print_json(type="MGMT", msg="Creating role for {}".format(role['group'])) mgmt.create_role('{}-1'.format(role['group']), role['group'], role['hosts'][0]) for role in config['services']['MGMT']['roles']: role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format(role['group'])) role_group.update_config(role.get('config', {})) mgmt.start().wait() if self.manager.get_service().serviceState == 'STARTED': print_json(type="MGMT", msg="Management Services started") else: fail(self.module, "[MGMT] Cloudera Management services didn't start up properly") def service_orchestrate(self, services): """ Create, pre-configure provided list of services Stop/Start those services Perform and post service startup actions :param services: List of Services to perform service specific actions """ service_classes = [] # Create and pre-configure provided services for service in services: service_config = self.config['services'].get(service.upper()) if service_config: svc = getattr(sys.modules[__name__], service)(self.cluster, service_config) if not svc.started: svc.deploy() svc.pre_start() service_classes.append(svc) print_json(type="CLUSTER", msg="Starting services: {} on Cluster".format(services)) # Deploy all the client configs, since some of the services depend on other services # and is essential that the client configs are in place self.cluster.deploy_client_config() # Start each service and run the post_start actions for each service for svc in service_classes: # Only go thru the steps if the service is not yet started. This helps with # re-running the script after fixing errors if not svc.started: svc.start() svc.post_start() def setup(self): # TODO(rnirmal): Cloudera Manager SSL? # Enable a full license or start a trial self.enable_license() # Create the cluster entity and associate hosts self.create_cluster() # Download and activate the parcels self.activate_parcels() # Inspect all the hosts self.wait_inspect_hosts(self.manager.inspect_hosts()) # Create Management services self.deploy_mgmt_services() # Configure and Start base services self.service_orchestrate(BASE_SERVICES) # Configure and Start remaining services self.service_orchestrate(ADDITIONAL_SERVICES)
def create_cluster(config_dict): config.read([ './conf/hadrian.ini', './conf/cluster_specs.ini', './conf/cloudera-manager/cm.ini' ]) cm_cluster_name = config_grabber("Globals")['cm.cluster.name'] cm_username = config_grabber("Globals")['cm.username'] cm_password = config_grabber("Globals")['cm.password'] cm_port = config_grabber("Globals")['cm.port'] version = config_grabber('Globals')['cdh.cluster.version'] cm_server = config_grabber(cm_cluster_name + '-en')['cm.server'] #Grab all configuration files in the directory with the CM Cluster Name. for i in os.listdir('./conf/' + cm_cluster_name): config.read('./conf/' + cm_cluster_name + '/' + i) all_nodes = list() while (get_cm_status(cm_server + ':' + cm_port) != 200): print 'Waiting for CM Server to start... ' time.sleep(15) api = ApiResource(cm_server, cm_port, cm_username, cm_password) # create cluster cluster = api.create_cluster(cm_cluster_name, version.upper()) #Config CM print 'Applying any configuration changes to Cloudera Manager' cmanager = api.get_cloudera_manager() cmanager.update_config(config_grabber('cloudera-manager-updates')) planned_nodes = config_grabber(cm_cluster_name + '-en')['full.list'].split(',') for k, v in config_grabber(cm_cluster_name + '-dn').iteritems(): for j in v.split(','): planned_nodes.append(j) # TODO make this smarter. show which agents haven't checked in. Add the option to continue without them. if len(api.get_all_hosts()) != len(planned_nodes): print 'Waiting for all agents to check into the CM Server before continuing.' while len(planned_nodes) > api.get_all_hosts(): print 'Waiting for the final set of CM Agent nodes to check in.' time.sleep(5) print 'Updating Rack configuration for data nodes.' all_hosts = list() for host in api.get_all_hosts(): all_hosts.append(host.hostId) for k, v in config_grabber(cm_cluster_name + '-dn').iteritems(): if host.hostname in v: print 'Setting host: ' + host.hostname + ' to rack /default/' + k host.set_rack_id('/default/' + k) print 'Adding all hosts to cluster.' cluster.add_hosts(all_hosts) # download CDH Parcels # TODO add some logic here to make the parcel list something that's read from the hadrian.ini # This will allow support for other CDH packages, Search, etc. if config_grabber('Globals')['cdh.distribution.method'] == 'parcels': distribute_parcel(cluster, 'CDH', config_grabber("Globals")['cdh.parcel.version']) if config_dict.get('hdfs_ha') == True: create_zookeeper_service(config_dict, cluster) create_hdfs_service(config_dict, cluster) cmd = cluster.deploy_client_config() if not cmd.wait(CMD_TIMEOUT).success: print 'Failed to deploy client configurations' else: print 'Client configuration deployment complete.' create_mapred_service(config_dict, cluster, cm_server) if config_dict.get('hbase') == True: if config_dict.get('hdfs_ha') == False: create_zookeeper_service(config_dict, cluster) create_hbase_service(config_dict, cluster) if config_dict.get('hive') == True: create_hive_service(config_dict, cluster) print 'Starting final client configuration deployment for all services.' cmd = cluster.deploy_client_config() if not cmd.wait(CMD_TIMEOUT).success: print 'Failed to deploy client configuration.' else: print 'Client configuration deployment complete. The cluster is all yours. Happy Hadooping.'
from cm_api.api_client import ApiResource CM_HOST = "127.0.0.1" ADMIN_USER = "******" ADMIN_PASS = "******" API = ApiResource(CM_HOST, version=14, username=ADMIN_USER, password=ADMIN_PASS) MANAGER = API.get_cloudera_manager() mgmt = MANAGER.get_service() print "restart mgmt..." mgmt.restart().wait() print "TIP cluster..." tip = API.get_cluster("TIP") tip.restart().wait()
#!/usr/bin/env python #Author: Pratap Raj #Purpose: Start Cloudera Management services import sys import socket from cm_api.api_client import ApiResource from cm_api.endpoints.cms import ClouderaManager ######### # Do not edit any system variables here. They are all passed from the startstopcluster.sh script, so make changes there. cmhost=str(sys.argv[1]) cmport=str(sys.argv[2]) cmusername=str(sys.argv[3]) cmpassword=str(sys.argv[4]) tlspref=str(sys.argv[5]) ######### api = ApiResource(cmhost, server_port=cmport, username=cmusername, password=cmpassword, use_tls=tlspref) mgmt=api.get_cloudera_manager().get_service() cmstartstatus=mgmt.start().wait() print cmstartstatus.success
def main(): module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS)) api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=9) cluster_name = CLUSTER_NAME manager = api.get_cloudera_manager() action_a = module.params.get('action', None) if action_a == 'create_cluster': license_a = module.params.get('license', None) version_a = module.params.get('version', None) cluster_list = [x.name for x in api.get_all_clusters()] if cluster_name in cluster_list: module.exit_json(changed=False, msg='Cluster exists') else: cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a) if license_a == None: manager.begin_trial() else: manager.update_license(license_a.decode('base64')) module.exit_json(changed=True, msg='Cluster created') elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster', 'create_snapshot_policy', 'deploy_configuration']: # more complicated actions that need a created cluster go here cluster = api.get_cluster(cluster_name) host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts()) # adds a host to the cluster # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal if action_a == 'add_host': host_a = module.params.get('host', None) host_list = host_map.keys() if host_a in host_list: module.exit_json(changed=False, msg='Host already in cluster') else: try: cluster.add_hosts([host_a]) except ApiException: # if a host isn't there, it could be because the agent didn't manage to connect yet # so let's wait a moment for it sleep(120) cluster.add_hosts([host_a]) module.exit_json(changed=True, msg='Host added') # create management service and set it's basic configuration # this needs a separate function since management is handled # differently than the rest of services elif action_a == 'create_mgmt': host_a = module.params.get('host', None) # getting the management service is the only way to check if mgmt exists # an exception means there isn't one try: mgmt = manager.get_service() module.exit_json(changed=False, msg='Mgmt service already exists') except ApiException: pass mgmt = manager.create_mgmt_service(ApiServiceSetupInfo()) # this is ugly... and I see no good way to unuglify it firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n") reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n") # since there is no easy way of configuring the manager... let's do it here :( role_conf = defaultdict(dict) role_conf['ACTIVITYMONITOR'] = { 'firehose_database_host': '{0}:7432'.format(host_a), 'firehose_database_user': '******', 'firehose_database_password': firehose_passwd, 'firehose_database_type': 'postgresql', 'firehose_database_name': 'amon', 'firehose_heapsize': '268435456', } role_conf['EVENTSERVER'] = { 'event_server_heapsize': '215964392' } role_conf['REPORTSMANAGER'] = { 'headlamp_database_host': '{0}:7432'.format(host_a), 'headlamp_database_user': '******', 'headlamp_database_password': reports_passwd, 'headlamp_database_type': 'postgresql', 'headlamp_database_name': 'rman', 'headlamp_heapsize': '268435456', } roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER'] # create mangement roles for role in roles: mgmt.create_role('{0}-1'.format(role), role, host_map[host_a]) # update configuration of each for group in mgmt.get_all_role_config_groups(): group.update_config(role_conf[group.roleType]) mgmt.start().wait() # after starting this service needs time to spin up sleep(30) module.exit_json(changed=True, msg='Mgmt created and started') # deploy a given parcel on all hosts in the cluster # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4 elif action_a == 'deploy_parcel': name_a = module.params.get('name', None) version_a = module.params.get('version', None) if "latest" in version_a: available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a] if "-latest" in version_a: version_substr = match('(.+?)-latest', version_a).group(1) # if version is just "latest", try to check everything else: version_substr = ".*" try: [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None] except ValueError: module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions)) else: version_parcel = version_a # we now go through various stages of getting the parcel # as there is no built-in way of waiting for an operation to complete # we use loops with sleep to get it done parcel = cluster.get_parcel(name_a, version_parcel) if parcel.stage == 'AVAILABLE_REMOTELY': parcel.start_download() while parcel.stage != 'DOWNLOADED': parcel = cluster.get_parcel(name_a, version_parcel) if parcel.state.errors: raise Exception(str(parcel.state.errors)) sleep(10) if parcel.stage == 'DOWNLOADED': parcel.start_distribution() while parcel.stage != 'DISTRIBUTED': parcel = cluster.get_parcel(name_a, version_parcel) if parcel.state.errors: raise Exception(str(parcel.state.errors)) # sleep while hosts report problems after the download for i in range(12): sleep(10) if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0: break # since parcels are distributed automatically when a new host is added to a cluster # we can encounter the ,,ACTIVATING'' stage then if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING': if parcel.stage == 'DISTRIBUTED': parcel.activate() while parcel.stage != 'ACTIVATED': parcel = cluster.get_parcel(name_a, version_parcel) # this sleep has to be large because although the operation is very fast # it makes the management and cloudera hosts go bonkers, failing all of the health checks sleep(10) # sleep while hosts report problems after the distribution for i in range(60): sleep(10) if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0: break module.exit_json(changed=True, msg='Parcel activated') if parcel.stage == 'ACTIVATED': module.exit_json(changed=False, msg='Parcel already activated') # if we get down here, something is not right module.fail_json(msg='Invalid parcel state') # deploy nodes for workers, according to SERVICE_WORKER_MAP # also give them sane names and init zookeeper and kafka ones # which need id's specified elif action_a == 'deploy_service_worker_nodes': host_a = module.params.get('host', None) service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] role_name = SERVICE_WORKER_MAP[service_a]['name'] full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring'] if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) else: service = cluster.get_service(service_name) nodes = [x for x in service.get_all_roles() if role_name in x.name] # if host already has the given group, we should skip it if host_map[host_a] in [x.hostRef.hostId for x in nodes]: module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name)) # find out the highest id that currently exists else: node_names = [x.name for x in nodes] if len(node_names) == 0: # if no nodes, start numbering from 1 node_i = 1 else: # take the max number and add 1 to it node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1 if service_name == 'ZOOKEEPER': role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a) # zookeeper needs a per-node ID in the configuration, so we set it now role.update_config({'serverId': node_i}) elif service_name == 'KAFKA': role = service.create_role(full_role_name.format(node_i), role_name, host_a) # kafka needs a per-node ID in the configuration, so we set it now role.update_config({'broker.id': node_i}) else: service.create_role(full_role_name.format(node_i), role_name, host_a) module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name)) # deploy a service. just create it, don't do anything more # this is needed maily when we have to set service properties before role deployment elif action_a == 'deploy_service': name_a = module.params.get('name', None) if not name_a in SERVICE_MAP: module.fail_json(msg='Unknown service: {0}'.format(name_a)) service_name = SERVICE_MAP[name_a] if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) module.exit_json(changed=True, msg='{0} service created'.format(service_name)) else: module.exit_json(changed=False, msg='{0} service already exists'.format(service_name)) # deploy the base hdfs roles (the namenode and secondary) # this doesn't create the service, as at least one datanode should already be added! # the format also requires certain properties to be set before we run it elif action_a == 'deploy_hdfs_base': nn_host_a = module.params.get('nn_host', None) sn_host_a = module.params.get('sn_host', None) changed = False hdfs = cluster.get_service('HDFS') hdfs_roles = [x.name for x in hdfs.get_all_roles()] # don't create a secondary namenode when: #- there is one that already exists #- there is a second namenode, which means we have HA and don't need a secondary if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles: hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a) changed = True # create a namenode and format it's FS # formating the namenode requires at least one datanode and secondary namenode already in the cluster! if not 'HDFS-NAMENODE' in hdfs_roles: hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a) for command in hdfs.format_hdfs('HDFS-NAMENODE'): if command.wait().success == False: module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage)) changed = True module.exit_json(changed=changed, msg='Created HDFS service & NN roles') # enable HttpFS for HDFS # HUE require this for support HA in HDFS elif action_a == 'deploy_hdfs_httpfs': host_a = module.params.get('host', None) hdfs = cluster.get_service('HDFS') hdfs_roles = [x.name for x in hdfs.get_all_roles()] # don't install second instance of HttpFS if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0: module.exit_json(changed=False, msg='HDFS HttpFS service already exists') hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) module.exit_json(changed=True, msg='HDFS HttpFS service created') # enable HA for HDFS # this deletes the secondary namenode and creates a second namenode in it's place # also, this spawns 3 journal node and 2 failover controller roles elif action_a == 'deploy_hdfs_ha': sn_host_a = module.params.get('sn_host', None) jn_dir_a = module.params.get('jn_dir', None) jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)] hdfs = cluster.get_service('HDFS') # if there's a second namenode, this means we already have HA enabled if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]: # this is bad and I should feel bad # jns is a list of dictionaries, each dict passes the required journalnode parameters jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': jn_dir_a, 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)] # this call is so long because we set some predictable names for the sevices command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER', active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2') children = command.wait().children for command_children in children: # The format command is expected to fail, since we already formated the namenode if command_children.name != 'Format' and command.success == False: module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage)) module.exit_json(changed=True, msg='Enabled HA for HDFS service') else: module.exit_json(changed=False, msg='HDFS HA already enabled') # enable HA for YARN elif action_a == 'deploy_rm_ha': sn_host_a = module.params.get('sn_host', None) yarn = cluster.get_service('YARN') # if there are two roles matching to this name, this means HA for YARN is enabled if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1: command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER') children = command.wait().children for command_children in children: if command.success == False: module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage)) module.exit_json(changed=True, msg='Enabled HA for YARN service') else: module.exit_json(changed=False, msg='YARN HA already enabled') # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP elif action_a == 'deploy_base_roles': host_a = module.params.get('host', None) service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] changed = False if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) else: service = cluster.get_service(service_name) service_roles = [x.name for x in service.get_all_roles()] # create each service from the map for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items(): # check if role already exists, script cant compare it directly # after enabling HA on YARN roles will have random strings in names if len([0 for x in service_roles if match(role_name, x) != None]) == 0: service.create_role(role_name, cloudera_name, host_a) changed = True # init commmands if role_name in SERVICE_INIT_COMMANDS.keys(): for command_to_run in SERVICE_INIT_COMMANDS[role_name]: # different handling of commands specified by name and # ones specified by an instance method if ismethod(command_to_run): command = command_to_run(service) else: command = service.service_command_by_name(command_to_run) if command.wait().success == False: module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage)) if changed == True: module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name)) else: module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name)) # deploy configuration - it always return changed elif action_a == 'deploy_configuration': service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] service = cluster.get_service(service_name) # deploying client configuration command = service.deploy_client_config() if command.wait().success == False: module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage)) module.exit_json(changed=True, msg='Configuration deployed') # set config values for a given service/role elif action_a == 'set_config': entity_a = module.params.get('entity', None) service_a = module.params.get('service', None) role_a = module.params.get('role', None) name_a = module.params.get('name', None) value_a = module.params.get('value', None) if not service_a in SERVICE_MAP: module.fail_json(msg='Unknown service: {0}'.format(service_a)) # since management is handled differently, it needs a different service if service_a == 'management': service = manager.get_service() elif service_a == 'cm': service = manager else: service = cluster.get_service(SERVICE_MAP[service_a]) # role and service configs are handled differently if entity_a == 'service': prev_config = service.get_config() curr_config = service.update_config({name_a: value_a}) if service_a == 'cm': prev_config = [prev_config] curr_config = [curr_config] module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a])) elif entity_a == 'role': if not role_a in ROLE_MAP: module.fail_json(msg='Unknown role: {0}'.format(service)) role = service.get_role_config_group(ROLE_MAP[role_a]) prev_config = role.get_config() curr_config = role.update_config({name_a: value_a}) module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a])) else: module.fail_json(msg='Invalid entity, must be one of service, role') # handle service state # currently this only can start/restart a service elif action_a == 'service': state_a = module.params.get('state', None) service_a = module.params.get('service', None) try: if service_a == 'cm': service = manager.get_service() else: service = cluster.get_service(SERVICE_MAP[service_a]) except ApiException: module.fail_json(msg='Service does not exist') # when starting a service, we also deploy the client config for it if state_a == 'started': if service.serviceState == 'STARTED': module.exit_json(changed=False, msg='Service already running') method = service.start verb = "start" elif state_a == 'restarted': method = service.restart verb = "restart" try: command = service.deploy_client_config() if command.wait().success == False: module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage)) # since there is no way to check if a service handles client config deployments # we try our best and pass the exception if it doesn't except ApiException, AttributeError: pass method().wait() # we need to wait for cloudera checks to complete... # otherwise it will report as failing sleep(10) for i in range(24): sleep(10) service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a]) if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD': break service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a]) if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD': module.exit_json(changed=True, msg='Service {0} successful'.format(verb)) else: module.fail_json(msg='Service {0} failed'.format(verb)) # handle cluster # currently this only can restart elif action_a == 'cluster': state_a = module.params.get('state', None) if state_a == 'restarted': command = cluster.restart(redeploy_client_configuration=True) if command.wait().success == False: module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage)) else: module.exit_json(changed=True, msg='Cluster restart successful') # Snapshot policy # only create is supported elif action_a == 'create_snapshot_policy': name_a = module.params.get('name', None) value_a = module.params.get('value', None) service_a = module.params.get('service', None) service = cluster.get_service(SERVICE_MAP[service_a]) payload=loads(value_a) # checking if policy already exists. Exception is expected when configure for the first time. try: test = service.get_snapshot_policy(name_a) module.exit_json(changed=False, msg='Defined policy already exists') except ApiException: pass try: command = service.create_snapshot_policy(payload) module.exit_json(changed=True, msg='Snapshot policy was created.') except ApiException, AttributeError: module.fail_json(msg='ERROR in creating snapshot policy.')
hue_db_user = config.get('CLOUDERA_PROPERTIES', 'hue_db_user') oozie_db_host = config.get('CLOUDERA_PROPERTIES', 'oozie_db_host') oozie_db_name = config.get('CLOUDERA_PROPERTIES', 'oozie_db_name') oozie_db_password = str(sys.argv[4]) oozie_db_user = config.get('CLOUDERA_PROPERTIES', 'oozie_db_user') api_version = config.get('CLOUDERA_PROPERTIES', 'api_version') # Get Cloudera Manager, config, and ODP Cluster logging.info('Retrieving Cloudera Manager service and cluster instance') api = ApiResource(cloudera_manager_server_api, 7180, management_console_username, management_console_password, version=api_version) cloudera_manager = ClouderaManager(api) cloudera_manager_config = api.get_cloudera_manager().get_config(view='full') cluster_name = 'Open Data Platform' cluster = api.get_cluster(cluster_name) # Retrieve all ApiHost objects, locate the management server and add others to clients logging.info('Retrieving all hosts from cluster') hosts = api.get_all_hosts() clients = [] for host in hosts: # Suppress Clock Offset warning that incorrectly states chrony is not working host.update_config({'host_health_suppression_host_clock_offset': 'true'}) # Separate Cloudera Manager Server from agents if host.hostname == cloudera_management_server_fqdn: cloudera_management_server = host else:
def install_java_8(region, stack_name): # following general protocol for upgrading to JDK 1.8 here: # http://www.cloudera.com/content/cloudera/en/documentation/core/v5-3-x/topics/cdh_cm_upgrading_to_jdk8.html ec2_conn = create_ec2_connection(region) manager_instance = get_manager_instance(ec2_conn, stack_name) cluster_instances = ( get_worker_instances(ec2_conn, stack_name) + [manager_instance, get_master_instance(ec2_conn, stack_name)]) cluster_hosts = [i.ip_address for i in cluster_instances] with cm_tunnel_ctx(manager_instance) as local_port: # Connect to CM API cm_api = ApiResource('localhost', username='******', password='******', server_port=local_port, version=9) cloudera_manager = cm_api.get_cloudera_manager() # Stop Cloudera Management Service print "Stopping Cloudera Management Service" mgmt_service = cloudera_manager.get_service() mgmt_service.stop().wait() # Stop cluster print "Stopping the cluster" clusters = cm_api.get_all_clusters() cluster = clusters.objects[0] cluster.stop().wait() # Stop all Cloudera Manager Agents @parallel def stop_cm_agents(): sudo('service cloudera-scm-agent stop') execute(stop_cm_agents, hosts=cluster_hosts) # Stop the Cloudera Manager Server def stop_cm_server(): sudo('service cloudera-scm-server stop') execute(stop_cm_server, hosts=[manager_instance.ip_address]) # Cleanup other Java versions and install JDK 1.8 @parallel def swap_jdks(): sudo('rpm -qa | grep jdk | xargs rpm -e') sudo('rm -rf /usr/java/jdk1.6*') sudo('rm -rf /usr/java/jdk1.7*') run('wget -O jdk-8-linux-x64.rpm --no-cookies --no-check-certificate ' '--header "Cookie: oraclelicense=accept-securebackup-cookie" ' 'http://download.oracle.com/otn-pub/java/jdk/8u51-b16/' 'jdk-8u51-linux-x64.rpm') sudo('yum install -y jdk-8-linux-x64.rpm') append('/home/ec2-user/.bash_profile', 'export JAVA_HOME=`find /usr/java -name "jdk1.8*"`') execute(swap_jdks, hosts=cluster_hosts) # Start the Cloudera Manager Server def start_cm_server(): sudo('service cloudera-scm-server start') execute(start_cm_server, hosts=[manager_instance.ip_address]) # Start all Cloudera Manager Agents @parallel def start_cm_agents(): sudo('service cloudera-scm-agent start') execute(start_cm_agents, hosts=cluster_hosts) with cm_tunnel_ctx(manager_instance) as local_port: # Connect to CM API cm_api = ApiResource('localhost', username='******', password='******', server_port=local_port, version=9) cloudera_manager = cm_api.get_cloudera_manager() # Start the cluster and the mgmt service print "Starting the cluster" cluster.start().wait() print "Starting the Cloudera Management Service" cloudera_manager = cm_api.get_cloudera_manager() mgmt_service = cloudera_manager.get_service() mgmt_service.start().wait()
class DeployCloudEraCluster(object): """ This class to define and setup the base properties of the cluster node for hadoop echo system """ _cloudera_manager_host = None _port_number = None _user_name = None _password = None _version = 12 def __init__(self, cloudera_manager_host, port_number, user_name, password, version): """ Initialize the object to provision the cluster node for the hadoop parcel based provision :param cloudera_manager_host: :param port_number: :param user_name: :param password: :param version: """ self._cloudera_manager_host = cloudera_manager_host self._port_number = port_number self._user_name = user_name self._password = password self._version = version # API version vary depending upon the job you want to perform. "1" if you want to check the cluster and 12 if you want to export the property of config self._cloudera_manager_oconnect = ApiResource( self._cloudera_manager_host, self._port_number, self._user_name, self._password, version=self._version) def get_cluster_versions(self): """ To get all the provisioned cluster versions against the Cloud era manager :return: """ for cluster in self._cloudera_manager_oconnect: print("%s = %s" % (cluster.name, cluster.version)) return cluster def get_cluster_services(self, cdh_version): """ To get all the provisioned cluster services against the specific cluster :return: """ for srv in cdh_version.get_all_services(): print srv if srv.type == "HDFS": hdfs = srv print hdfs.name, hdfs.serviceState, hdfs.healthSummary print hdfs.serviceUrl for chk in hdfs.healthChecks: print "%s --- %s" % (chk['name'], chk['summary']) def get_cluster_roles_info(self, cdh_version): """ To get the details of all the roles for each cluster node :return: """ for role in cdh_version.get_all_roles(): if role.type == 'NAMENODE': namenode = role print "Role name: %s\nState: %s\nHealth: %s\nHost: %s" % ( namenode.name, namenode.roleState, namenode.healthSummary, namenode.hostRef.hostId) def get_cdh_metrics_details(self, cdh_version): """ To get the CDH metrics containing details about all the activities in the cluster node :param cdh_version: :return: """ metrics = cdh_version.get_metrics() for metric in metrics: print "%s (%s)" % (metric.name, metric.unit) def start_service(self, cdh_service_name): """ To start or stop the CDH service :param cdh_service_name: :return: """ service = cdh_service_name.restart() print service.active service_status = service.wait() print "Active: %s. Success: %s" % (service_status.active, service_status.success) def restart_service(self, cdh_service_name, namenode): """ To restart the service of the specific role :param cdh_service_name: :param namenode: :return: """ commands = cdh_service_name.restart_roles(namenode.name) for command in commands: print command def configure_services(self, cdh_service_name): """ To configure the specific services with available roles :return: """ for name, config in cdh_service_name.get_config( view='full')[0].items(): print "%s - %s - %s" % (name, config.relatedName, config.description) def export_cluster_template(self, template_filename, cluster_name): """ To export the current cluster configuration into the given file. :param template_filename: :return: """ cluster = self._cloudera_manager_oconnect.get_cluster(cluster_name) cdh_template = cluster.export() with open(template_filename, 'w') as outfile: json.dump(cdh_template.to_json_dict(), outfile, indent=4, sort_keys=True) def import_cluster_template(self, template_filename, cluster_name): """ To import cluster template configuration into given cluster :param template_filename: :param cluster_name: :return: """ cluster = self._cloudera_manager_oconnect.get_cluster(cluster_name) with open(template_filename) as data_file: data = json.load(data_file) template = ApiClusterTemplate(cluster).from_json_dict(data, cluster) cms = ClouderaManager(cluster) command = cms.import_cluster_template(template) print(command) def deploy_cloudera_manager_services(self): """ To deploy the cloudera manager services :return: """ varEnableConfigAlerts = True varServiceGroupName = "cloudera-scm" varServiceUserName = "******" varMgmtServiceConfig = { 'enable_config_alerts': varEnableConfigAlerts, 'process_groupname': varServiceGroupName, 'process_username': varServiceUserName, } varManager = self._cloudera_manager_oconnect.get_cloudera_manager() varMgmt = varManager.create_mgmt_service(ApiServiceSetupInfo()) # update the cloudera service config varMgmt.update_config(varMgmtServiceConfig) # Get the cloudera services configured services = varManager.get_service() varMgmt.create_role("ACTIVITYMONITOR-1", "ACTIVITYMONITOR", self._cloudera_manager_host) varMgmt.create_role("ALERTPUBLISHER-1", "ALERTPUBLISHER", self._cloudera_manager_host) varMgmt.create_role("EVENTSERVER-1", "EVENTSERVER", self._cloudera_manager_host) varMgmt.create_role("HOSTMONITOR-1", "HOSTMONITOR", self._cloudera_manager_host) varMgmt.create_role("SERVICEMONITOR-1", "SERVICEMONITOR", self._cloudera_manager_host) varMgmt.create_role("REPORTSMANAGER-1", "REPORTSMANAGER", self._cloudera_manager_host) def deploy_activity_monitor(self): """ To deploy the Activity monitor services :return: """ varActivityMonitorPassword = "******" varMgmt = self._cloudera_manager_oconnect.get_service() # config for the activity monitoring varActivityMonitorConfig = { 'firehose_database_host': "pocd-cm581-dev-manager.poc-d.internal" + ":" + "7432", 'firehose_database_user': "******", 'firehose_database_password': varActivityMonitorPassword, 'firehose_database_type': "postgresql", 'firehose_database_name': "amon", 'firehose_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': "true", } varRole = varMgmt.get_role("ACTIVITYMONITOR-1") varRole.update_config(varActivityMonitorConfig) def deploy_alert_publisher(self): """ To deploy the alert publisher :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varAlertPublisherConfig = { 'alert_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-alertpublisher", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("ALERTPUBLISHER-1") varRole.update_config(varAlertPublisherConfig) def deploy_event_server(self): """ To deploy event server :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varEventServerConfig = { 'event_server_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-eventserver", 'eventserver_index_dir': "/opt/cloudera/lib/cloudera-scm-eventserver", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("EVENTSERVER-1") varRole.update_config(varEventServerConfig) def deploy_host_monitor(self): """ To deploy host monitor :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varHostMonitorConfig = { 'firehose_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose", 'firehose_storage_dir': "/opt/cloudera/lib/cloudera-host-monitor", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("HOSTMONITOR-1") varRole.update_config(varHostMonitorConfig) def deploy_service_monitor(self): """ To deploy the service monitor :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varServiceMonitorConfig = { 'firehose_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose", 'firehose_storage_dir': "/opt/cloudera/lib/cloudera-service-monitor", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("SERVICEMONITOR-1") varRole.update_config(varServiceMonitorConfig) def deploy_report_manager(self): """ To deploy the service Report Manager :return: """ varReportManagerPassword = "******" varMgmt = self._cloudera_manager_oconnect.get_service() varReportManagerConfig = { 'headlamp_database_host': "pocd-cm581-dev-manager.poc-d.internal" + ":" + "7432", 'headlamp_database_user': "******", 'headlamp_database_password': varReportManagerPassword, 'headlamp_database_type': "postgresql", 'headlamp_database_name': "rman", 'headlamp_heapsize': 536870912, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-headlamp", 'headlamp_scratch_dir': "/opt/cloudera/lib/cloudera-scm-headlamp", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("REPORTSMANAGER-1") varRole.update_config(varReportManagerConfig) def deploy_services(self): """ To deploy all the cloudera manager services :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varMgmt.start().wait() def create_hadoop_cluster(self): """ To create hadoop cluster with multiple data and name nodes and configure different services :return: """ varClusterName = "POC-D Cluster" varCDHVersion = "CDH5" varCDHFullVersion = "5.8.0" varCluster = varApiResource.create_cluster(varClusterName, varCDHVersion, varCDHFullVersion)
def do_call(user, password, man_host, man_port, cluster_name, parcel_name, parcel_version, parcel_repo, init_pre_dir, init_post_dir): api = ApiResource(man_host, man_port, user, password, False, MAN_API_VERSION) if not parcel_repo.endswith('/'): parcel_repo += '/' if re.match(REGEX_VERSION, parcel_version) is None or re.match( REGEX_VERSION, parcel_version).group() != parcel_version: raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] expected to match regular expression [' + REGEX_VERSION + ']') if not parcel_repo.endswith(parcel_version + '/'): raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] when compared with repository [' + parcel_repo + ']') cm_config = api.get_cloudera_manager().get_config(view='full') repo_config = cm_config['REMOTE_PARCEL_REPO_URLS'] repo_list = repo_config.value or repo_config.default if parcel_repo not in repo_list: repo_list += ',' + parcel_repo api.get_cloudera_manager().update_config( {'REMOTE_PARCEL_REPO_URLS': repo_list}) time.sleep( POLL_SEC ) # The parcel synchronize end-point is not exposed via the API, so sleep instead cluster_names = [] if cluster_name is None: for cluster in api.get_all_clusters(): cluster_names.append(cluster.name) else: cluster_names.append(cluster_name) for cluster_name_itr in cluster_names: print 'Cluster [DEPLOYMENT] starting ... ' cluster = api.get_cluster(cluster_name_itr) parcel = cluster.get_parcel(parcel_name, parcel_version) parcel_already_activated = False print 'Parcel [DEPLOYMENT] starting ... ' if parcel.stage == 'ACTIVATED': parcel_already_activated = True print 'Parcel [DEPLOYMENT] already deployed' else: do_parcel_op(cluster, parcel_name, parcel_version, 'DOWNLOAD', 'AVAILABLE_REMOTELY', 'DOWNLOADED', 'start_download') do_parcel_op(cluster, parcel_name, parcel_version, 'DISTRIBUTE', 'DOWNLOADED', 'DISTRIBUTED', 'start_distribution') do_parcel_op(cluster, parcel_name, parcel_version, 'ACTIVATE', 'DISTRIBUTED', 'ACTIVATED', 'activate') parcel = cluster.get_parcel(parcel_name, parcel_version) if parcel.stage != 'ACTIVATED': raise Exception('Parcel is currently mid-stage [' + parcel.stage + '], please wait for this to complete') print 'Parcel [DEPLOYMENT] finished' if init_pre_dir is not None and os.path.isdir(init_pre_dir): print 'Cluster [PRE_INIT] starting ... ' for script in glob.glob(init_pre_dir + '/*.sh'): subprocess.call([script]) print 'Cluster [PRE_INIT] finished' if not parcel_already_activated: print 'Cluster [CONFIG_DEPLOYMENT] starting ... ' cluster.deploy_client_config() cmd = cluster.deploy_client_config() if not cmd.wait(TIMEOUT_SEC).success: raise Exception('Failed to deploy client configs') print 'Cluster [CONFIG_DEPLOYMENT] finished' print 'Cluster [RESTART] starting ... ' for service in cluster.get_all_services(): if service.type == 'FLUME': service.restart().wait() if service.type == 'HIVE': service.restart().wait() if service.type == 'YARN': service.restart().wait() print 'Cluster [RESTART] finished' if init_post_dir is not None and os.path.isdir(init_post_dir): print 'Cluster [POST_INIT] starting ... ' for script in glob.glob(init_post_dir + '/*.sh'): subprocess.call([script]) print 'Cluster [POST_INIT] finished' print 'Cluster [DEPLOYMENT] finished'
#!/usr/bin/env python #Author: Pratap Raj #Purpose: Start Cloudera Management services import sys import socket from cm_api.api_client import ApiResource from cm_api.endpoints.cms import ClouderaManager ######### # Do not edit any system variables here. They are all passed from the startstopcluster.sh script, so make changes there. cmhost = str(sys.argv[1]) cmport = str(sys.argv[2]) cmusername = str(sys.argv[3]) cmpassword = str(sys.argv[4]) tlspref = str(sys.argv[5]) ######### api = ApiResource(cmhost, server_port=cmport, username=cmusername, password=cmpassword, use_tls=tlspref) mgmt = api.get_cloudera_manager().get_service() cmstartstatus = mgmt.start().wait() print cmstartstatus.success
def main(): module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS)) api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=10) cluster_name = CLUSTER_NAME manager = api.get_cloudera_manager() action_a = module.params.get('action', None) if action_a == 'create_cluster': license_a = module.params.get('license', None) version_a = module.params.get('version', None) cluster_list = [x.name for x in api.get_all_clusters()] if cluster_name in cluster_list: module.exit_json(changed=False, msg='Cluster exists') else: cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a) if license_a == None: manager.begin_trial() else: manager.update_license(license_a.decode('base64')) module.exit_json(changed=True, msg='Cluster created') elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster','create_snapshot_policy']: # more complicated actions that need a created cluster go here cluster = api.get_cluster(cluster_name) host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts()) # adds a host to the cluster # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal if action_a == 'add_host': host_a = module.params.get('host', None) host_list = host_map.keys() if host_a in host_list: module.exit_json(changed=False, msg='Host already in cluster') else: try: cluster.add_hosts([host_a]) except ApiException: # if a host isn't there, it could be because the agent didn't manage to connect yet # so let's wait a moment for it sleep(120) cluster.add_hosts([host_a]) module.exit_json(changed=True, msg='Host added') # create management service and set it's basic configuration # this needs a separate function since management is handled # differently than the rest of services elif action_a == 'create_mgmt': host_a = module.params.get('host', None) # getting the management service is the only way to check if mgmt exists # an exception means there isn't one try: mgmt = manager.get_service() module.exit_json(changed=False, msg='Mgmt service already exists') except ApiException: pass mgmt = manager.create_mgmt_service(ApiServiceSetupInfo()) # this is ugly... and I see no good way to unuglify it firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n") reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n") # since there is no easy way of configuring the manager... let's do it here :( role_conf = defaultdict(dict) role_conf['ACTIVITYMONITOR'] = { 'firehose_database_host': '{0}:7432'.format(host_a), 'firehose_database_user': '******', 'firehose_database_password': firehose_passwd, 'firehose_database_type': 'postgresql', 'firehose_database_name': 'amon', 'firehose_heapsize': '268435456', } role_conf['EVENTSERVER'] = { 'event_server_heapsize': '215964392' } role_conf['REPORTSMANAGER'] = { 'headlamp_database_host': '{0}:7432'.format(host_a), 'headlamp_database_user': '******', 'headlamp_database_password': reports_passwd, 'headlamp_database_type': 'postgresql', 'headlamp_database_name': 'rman', 'headlamp_heapsize': '215964392', } roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER'] # create mangement roles for role in roles: mgmt.create_role('{0}-1'.format(role), role, host_map[host_a]) # update configuration of each for group in mgmt.get_all_role_config_groups(): group.update_config(role_conf[group.roleType]) mgmt.start().wait() # after starting this service needs time to spin up sleep(30) module.exit_json(changed=True, msg='Mgmt created and started') # deploy a given parcel on all hosts in the cluster # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4 elif action_a == 'deploy_parcel': name_a = module.params.get('name', None) version_a = module.params.get('version', None) if "latest" in version_a: available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a] if "-latest" in version_a: version_substr = match('(.+?)-latest', version_a).group(1) # if version is just "latest", try to check everything else: version_substr = ".*" try: [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None] except ValueError: module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions)) else: version_parcel = version_a # we now go through various stages of getting the parcel # as there is no built-in way of waiting for an operation to complete # we use loops with sleep to get it done parcel = cluster.get_parcel(name_a, version_parcel) if parcel.stage == 'AVAILABLE_REMOTELY': parcel.start_download() while parcel.stage != 'DOWNLOADED': parcel = cluster.get_parcel(name_a, version_parcel) if parcel.state.errors: raise Exception(str(parcel.state.errors)) sleep(10) if parcel.stage == 'DOWNLOADED': parcel.start_distribution() while parcel.stage != 'DISTRIBUTED': parcel = cluster.get_parcel(name_a, version_parcel) if parcel.state.errors: raise Exception(str(parcel.state.errors)) # sleep while hosts report problems after the download for i in range(12): sleep(10) if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0: break # since parcels are distributed automatically when a new host is added to a cluster # we can encounter the ,,ACTIVATING'' stage then if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING': if parcel.stage == 'DISTRIBUTED': parcel.activate() while parcel.stage != 'ACTIVATED': parcel = cluster.get_parcel(name_a, version_parcel) # this sleep has to be large because although the operation is very fast # it makes the management and cloudera hosts go bonkers, failing all of the health checks sleep(10) # sleep while hosts report problems after the distribution for i in range(60): sleep(10) if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0: break module.exit_json(changed=True, msg='Parcel activated') if parcel.stage == 'ACTIVATED': module.exit_json(changed=False, msg='Parcel already activated') # if we get down here, something is not right module.fail_json(msg='Invalid parcel state') # deploy nodes for workers, according to SERVICE_WORKER_MAP # also give them sane names and init zookeeper and kafka ones # which need id's specified elif action_a == 'deploy_service_worker_nodes': host_a = module.params.get('host', None) service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] role_name = SERVICE_WORKER_MAP[service_a]['name'] full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring'] if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) else: service = cluster.get_service(service_name) nodes = [x for x in service.get_all_roles() if role_name in x.name] # if host already has the given group, we should skip it if host_map[host_a] in [x.hostRef.hostId for x in nodes]: module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name)) # find out the highest id that currently exists else: node_names = [x.name for x in nodes] if len(node_names) == 0: # if no nodes, start numbering from 1 node_i = 1 else: # take the max number and add 1 to it node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1 if service_name == 'ZOOKEEPER': role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a) # zookeeper needs a per-node ID in the configuration, so we set it now role.update_config({'serverId': node_i}) elif service_name == 'KAFKA': role = service.create_role(full_role_name.format(node_i), role_name, host_a) # kafka needs a per-node ID in the configuration, so we set it now role.update_config({'broker.id': node_i}) else: service.create_role(full_role_name.format(node_i), role_name, host_a) module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name)) # deploy a service. just create it, don't do anything more # this is needed maily when we have to set service properties before role deployment elif action_a == 'deploy_service': name_a = module.params.get('name', None) if not name_a in SERVICE_MAP: module.fail_json(msg='Unknown service: {0}'.format(name_a)) service_name = SERVICE_MAP[name_a] if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) module.exit_json(changed=True, msg='{0} service created'.format(service_name)) else: module.exit_json(changed=False, msg='{0} service already exists'.format(service_name)) # deploy the base hdfs roles (the namenode and secondary) # this doesn't create the service, as at least one datanode should already be added! # the format also requires certain properties to be set before we run it elif action_a == 'deploy_hdfs_base': nn_host_a = module.params.get('nn_host', None) sn_host_a = module.params.get('sn_host', None) changed = False hdfs = cluster.get_service('HDFS') hdfs_roles = [x.name for x in hdfs.get_all_roles()] # don't create a secondary namenode when: #- there is one that already exists #- there is a second namenode, which means we have HA and don't need a secondary if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles: hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a) changed = True # create a namenode and format it's FS # formating the namenode requires at least one datanode and secondary namenode already in the cluster! if not 'HDFS-NAMENODE' in hdfs_roles: hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a) for command in hdfs.format_hdfs('HDFS-NAMENODE'): if command.wait().success == False: module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage)) changed = True module.exit_json(changed=changed, msg='Created HDFS service & NN roles') # enable HttpFS for HDFS # HUE require this for support HA in HDFS elif action_a == 'deploy_hdfs_httpfs': host_a = module.params.get('host', None) hdfs = cluster.get_service('HDFS') hdfs_roles = [x.name for x in hdfs.get_all_roles()] # don't install second instance of HttpFS if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0: module.exit_json(changed=False, msg='HDFS HttpFS service already exists') hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) module.exit_json(changed=True, msg='HDFS HttpFS service created') # enable HA for HDFS # this deletes the secondary namenode and creates a second namenode in it's place # also, this spawns 3 journal node and 2 failover controller roles elif action_a == 'deploy_hdfs_ha': sn_host_a = module.params.get('sn_host', None) jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)] hdfs = cluster.get_service('HDFS') # if there's a second namenode, this means we already have HA enabled if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]: # this is bad and I should feel bad # jns is a list of dictionaries, each dict passes the required journalnode parameters jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': '/data0/hadoop/journal', 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)] # this call is so long because we set some predictable names for the sevices command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER', active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2') children = command.wait().children for command_children in children: # The format command is expected to fail, since we already formated the namenode if command_children.name != 'Format' and command.success == False: module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage)) module.exit_json(changed=True, msg='Enabled HA for HDFS service') else: module.exit_json(changed=False, msg='HDFS HA already enabled') # enable HA for YARN elif action_a == 'deploy_rm_ha': sn_host_a = module.params.get('sn_host', None) yarn = cluster.get_service('YARN') # if there are two roles matching to this name, this means HA for YARN is enabled if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1: command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER') children = command.wait().children for command_children in children: if command.success == False: module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage)) module.exit_json(changed=True, msg='Enabled HA for YARN service') else: module.exit_json(changed=False, msg='YARN HA already enabled') # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP elif action_a == 'deploy_base_roles': host_a = module.params.get('host', None) service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] changed = False if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) else: service = cluster.get_service(service_name) service_roles = [x.name for x in service.get_all_roles()] # create each service from the map for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items(): # check if role already exists, script cant compare it directly # after enabling HA on YARN roles will have random strings in names if len([0 for x in service_roles if match(role_name, x) != None]) == 0: service.create_role(role_name, cloudera_name, host_a) changed = True # init commmands if role_name in SERVICE_INIT_COMMANDS.keys(): for command_to_run in SERVICE_INIT_COMMANDS[role_name]: # different handling of commands specified by name and # ones specified by an instance method if ismethod(command_to_run): command = command_to_run(service) else: command = service.service_command_by_name(command_to_run) if command.wait().success == False: module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage)) if changed == True: module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name)) else: module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name)) # set config values for a given service/role elif action_a == 'set_config': entity_a = module.params.get('entity', None) service_a = module.params.get('service', None) role_a = module.params.get('role', None) name_a = module.params.get('name', None) value_a = module.params.get('value', None) if not service_a in SERVICE_MAP: module.fail_json(msg='Unknown service: {0}'.format(service_a)) # since management is handled differently, it needs a different service if service_a == 'management': service = manager.get_service() elif service_a == 'cm': service = manager else: service = cluster.get_service(SERVICE_MAP[service_a]) # role and service configs are handled differently if entity_a == 'service': prev_config = service.get_config() curr_config = service.update_config({name_a: value_a}) if service_a == 'cm': prev_config = [prev_config] curr_config = [curr_config] module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a])) elif entity_a == 'role': if not role_a in ROLE_MAP: module.fail_json(msg='Unknown role: {0}'.format(service)) role = service.get_role_config_group(ROLE_MAP[role_a]) prev_config = role.get_config() curr_config = role.update_config({name_a: value_a}) module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a])) else: module.fail_json(msg='Invalid entity, must be one of service, role') # handle service state # currently this only can start/restart a service elif action_a == 'service': state_a = module.params.get('state', None) service_a = module.params.get('service', None) try: if service_a == 'cm': service = manager.get_service() else: service = cluster.get_service(SERVICE_MAP[service_a]) except ApiException: module.fail_json(msg='Service does not exist') # when starting a service, we also deploy the client config for it if state_a == 'started': if service.serviceState == 'STARTED': module.exit_json(changed=False, msg='Service already running') method = service.start verb = "start" elif state_a == 'restarted': method = service.restart verb = "restart" try: command = service.deploy_client_config() if command.wait().success == False: module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage)) # since there is no way to check if a service handles client config deployments # we try our best and pass the exception if it doesn't except ApiException, AttributeError: pass method().wait() # we need to wait for cloudera checks to complete... # otherwise it will report as failing sleep(10) for i in range(24): sleep(10) service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a]) if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD': break service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a]) if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD': module.exit_json(changed=True, msg='Service {0} successful'.format(verb)) else: module.fail_json(msg='Service {0} failed'.format(verb)) # handle cluster # currently this only can restart elif action_a == 'cluster': state_a = module.params.get('state', None) if state_a == 'restarted': command = cluster.restart(redeploy_client_configuration=True) if command.wait().success == False: module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage)) else: module.exit_json(changed=True, msg='Cluster restart successful') # Snapshot policy # only create is supported elif action_a == 'create_snapshot_policy': name_a = module.params.get('name', None) value_a = module.params.get('value', None) service_a = module.params.get('service', None) service = cluster.get_service(SERVICE_MAP[service_a]) payload=loads(value_a) # checking if policy already exists. Exception is expected when configure for the first time. try: test = service.get_snapshot_policy(name_a) module.exit_json(changed=False, msg='Defined policy already exists') except ApiException: pass try: command = service.create_snapshot_policy(payload) module.exit_json(changed=True, msg='Snapshot policy was created.') except ApiException, AttributeError: module.fail_json(msg='ERROR in creating snapshot policy.')
class ClouderaManager(object): """ The complete orchestration of a cluster from start to finish assuming all the hosts are configured and Cloudera Manager is installed with all the required databases setup. Handle all the steps required in creating a cluster. All the functions are built to function idempotently. So you should be able to resume from any failed step but running thru the __class__.setup() """ def __init__(self, module, config, trial=False, license_txt=None): self.api = ApiResource(config['cm']['host'], username=config['cm']['username'], password=config['cm']['password']) self.manager = self.api.get_cloudera_manager() self.config = config self.module = module self.trial = trial self.license_txt = license_txt self.cluster = None def enable_license(self): """ Enable the requested license, either it's trial mode or a full license is entered and registered. """ try: _license = self.manager.get_license() except ApiException: print_json(type="LICENSE", msg="Enabling license") if self.trial: self.manager.begin_trial() else: if license_txt is not None: self.manager.update_license(license_txt) else: fail( self.module, 'License should be provided or trial should be specified' ) try: _license = self.manager.get_license() except ApiException: fail(self.module, 'Failed enabling license') print_json(type="LICENSE", msg="Owner: {}, UUID: {}".format(_license.owner, _license.uuid)) def create_cluster(self): """ Create a cluster and add hosts to the cluster. A new cluster is only created if another one doesn't exist with the same name. """ print_json(type="CLUSTER", msg="Creating cluster") cluster_config = self.config['cluster'] try: self.cluster = self.api.get_cluster(cluster_config['name']) except ApiException: print_json(type="CLUSTER", msg="Creating Cluster entity: {}".format( cluster_config['name'])) self.cluster = self.api.create_cluster( cluster_config['name'], cluster_config['version'], cluster_config['fullVersion']) cluster_hosts = [ self.api.get_host(host.hostId).hostname for host in self.cluster.list_hosts() ] hosts = [] for host in cluster_config['hosts']: if host not in cluster_hosts: hosts.append(host) self.cluster.add_hosts(hosts) def activate_parcels(self): print_json(type="PARCELS", msg="Setting up parcels") for parcel_cfg in self.config['parcels']: parcel = Parcels(self.module, self.manager, self.cluster, parcel_cfg.get('version'), parcel_cfg.get('repo'), parcel_cfg.get('product', 'CDH')) parcel.download() parcel.distribute() parcel.activate() @retry(attempts=20, delay=5) def wait_inspect_hosts(self, cmd): """ Inspect all the hosts. Basically wait till the check completes on all hosts. :param cmd: A command instance used for tracking the status of the command """ print_json(type="HOSTS", msg="Inspecting hosts") cmd = cmd.fetch() if cmd.success is None: raise ApiException("Waiting on command {} to finish".format(cmd)) elif not cmd.success: if (cmd.resultMessage is not None and 'is not currently available for execution' in cmd.resultMessage): raise ApiException('Retry Command') fail(self.module, 'Host inspection failed') print_json(type="HOSTS", msg="Host inspection completed: {}".format( cmd.resultMessage)) def deploy_mgmt_services(self): """ Configure, deploy and start all the Cloudera Management Services. """ print_json(type="MGMT", msg="Deploying Management Services") try: mgmt = self.manager.get_service() if mgmt.serviceState == 'STARTED': return except ApiException: print_json(type="MGMT", msg="Management Services don't exist. Creating.") mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo()) for role in config['services']['MGMT']['roles']: if not len(mgmt.get_roles_by_type(role['group'])) > 0: print_json(type="MGMT", msg="Creating role for {}".format(role['group'])) mgmt.create_role('{}-1'.format(role['group']), role['group'], role['hosts'][0]) for role in config['services']['MGMT']['roles']: role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format( role['group'])) role_group.update_config(role.get('config', {})) mgmt.start().wait() if self.manager.get_service().serviceState == 'STARTED': print_json(type="MGMT", msg="Management Services started") else: fail( self.module, "[MGMT] Cloudera Management services didn't start up properly") def service_orchestrate(self, services): """ Create, pre-configure provided list of services Stop/Start those services Perform and post service startup actions :param services: List of Services to perform service specific actions """ service_classes = [] # Create and pre-configure provided services for service in services: service_config = self.config['services'].get(service.upper()) if service_config: svc = getattr(sys.modules[__name__], service)(self.cluster, service_config) if not svc.started: svc.deploy() svc.pre_start() service_classes.append(svc) print_json(type="CLUSTER", msg="Starting services: {} on Cluster".format(services)) # Deploy all the client configs, since some of the services depend on other services # and is essential that the client configs are in place self.cluster.deploy_client_config() # Start each service and run the post_start actions for each service for svc in service_classes: # Only go thru the steps if the service is not yet started. This helps with # re-running the script after fixing errors if not svc.started: svc.start() svc.post_start() def setup(self): # TODO(rnirmal): Cloudera Manager SSL? # Enable a full license or start a trial self.enable_license() # Create the cluster entity and associate hosts self.create_cluster() # Download and activate the parcels self.activate_parcels() # Inspect all the hosts self.wait_inspect_hosts(self.manager.inspect_hosts()) # Create Management services self.deploy_mgmt_services() # Configure and Start base services self.service_orchestrate(BASE_SERVICES) # Configure and Start remaining services self.service_orchestrate(ADDITIONAL_SERVICES)
class Deploy: def __init__(self, cm_port='7180', cm_user='******', cm_passwd='admin', cluster_name='cluster1'): self.cluster_name = cluster_name self.cdh_version = "CDH5" self.cfg = ParseConfig() self.host_list = self.cfg.get_hosts() self._get_host_allocate() self.cm_host = self.host_list[0] self.api = ApiResource(self.cm_host, cm_port, cm_user, cm_passwd, version=7) self.cm = self.api.get_cloudera_manager() try: self.cluster = self.api.get_cluster(self.cluster_name) except: try: self.cluster = self.api.create_cluster(self.cluster_name, self.cdh_version) except: err('Cannot connect to cloudera manager on %s' % self.cm_host) # add all our hosts to the cluster try: self.cluster.add_hosts(self.host_list) info('Add hosts successfully') except Exception as e: if e.code == 400: info('Already Added hosts') elif e.code == 404: err(e.message) def _auto_allocate(self, hosts): # enable mgmt node if node count is larger than mgmt_th mgmt_th = 6 if type(hosts) != list: err('hosts parameter should be a list') host_num = len(hosts) # node<=3, ZK=1 ,node>3, ZK=3 zk_num = 1 if host_num <= 3 else 3 # with mgmt node if host_num >= mgmt_th: self.ap_host = self.es_host = self.ho_host = self.sm_host = self.nn_host = self.hm_host = self.jt_host = hosts[ 0] self.dn_hosts = self.rs_hosts = self.tt_hosts = hosts[1:] self.snn_host = hosts[1] self.hms_host = hosts[2] self.hs2_host = hosts[3] # without mgmt node else: if host_num == 1: self.ap_host = self.es_host = self.ho_host = self.sm_host = self.jt_host = \ self.nn_host = self.hm_host = self.snn_host = self.hms_host = self.hs2_host = hosts[0] elif host_num > 1: # nn, snn not on same node tmp_hosts = hosts[:] self.nn_host = choice(tmp_hosts) tmp_hosts.remove(self.nn_host) self.snn_host = choice(tmp_hosts) self.hm_host = choice(tmp_hosts) self.jt_host = choice(hosts) self.hms_host = choice(hosts) self.hs2_host = choice(hosts) # cm self.ap_host = choice(hosts) self.es_host = choice(hosts) self.ho_host = choice(hosts) self.sm_host = choice(hosts) self.dn_hosts = self.rs_hosts = self.tt_hosts = hosts self.zk_hosts = hosts[-zk_num:] def _get_host_allocate(self): roles = self.cfg.get_roles() # auto set if no role config found if not roles: self._auto_allocate(self.host_list) return valid_roles = [ 'DN', 'RS', 'ZK', 'HM', 'NN', 'SNN', 'AP', 'ES', 'SM', 'HO', 'TT', 'JT', 'HMS', 'HS2' ] role_host = defaultdict(list) for item in roles: for role in item[1]: role = role.strip() if role not in valid_roles: err('Incorrect role config') role_host[role].append(item[0]) # cdh self.nn_host = role_host['NN'][0] self.snn_host = role_host['SNN'][0] self.hm_host = role_host['HM'][0] self.jt_host = role_host['JT'][0] self.hms_host = role_host['HMS'][0] self.hs2_host = role_host['HS2'][0] self.tt_hosts = role_host['TT'] self.zk_hosts = role_host['ZK'] self.dn_hosts = role_host['DN'] self.rs_hosts = role_host['RS'] # cm self.ap_host = role_host['AP'][0] self.es_host = role_host['ES'][0] self.ho_host = role_host['HO'][0] self.sm_host = role_host['SM'][0] def setup_cms(self): try: self.cm.delete_mgmt_service() except: pass # create the management service try: mgmt = self.cm.create_mgmt_service(ApiServiceSetupInfo()) mgmt.create_role('AlertPublisher', "ALERTPUBLISHER", self.ap_host) mgmt.create_role('EventServer', "EVENTSERVER", self.es_host) mgmt.create_role('HostMonitor', "HOSTMONITOR", self.hm_host) mgmt.create_role('ServiceMonitor', "SERVICEMONITOR", self.sm_host) ok('Cloudera management service created successfully.') except ApiException: info('Cloudera management service had already been created.') def setup_parcel(self): parcels_list = [] i = 1 for p in self.cluster.get_all_parcels(): if p.stage == 'AVAILABLE_REMOTELY': continue elif p.stage == 'ACTIVATED': info('Parcel [%s] has already been activated' % p.version) return else: print '\t' + str(i) + ': ' + p.product + ' ' + p.version i += 1 parcels_list.append(p) if len(parcels_list) == 0: err('No downloaded ' + self.cdh_version + ' parcel found!') elif len(parcels_list) > 1: index = raw_input('Input parcel number:') if not index.isdigit: err('Error index, must be a number') cdh_parcel = parcels_list[int(index) - 1] else: cdh_parcel = parcels_list[0] # # download the parcel # print "Starting parcel download. This might take a while." # cmd = cdh_parcel.start_download() # if cmd.success != True: # print "Parcel download failed!" # exit(0) # # make sure the download finishes # while cdh_parcel.stage != 'DOWNLOADED': # sleep(5) # cdh_parcel = self.cluster.get_parcel(cdh_parcel.product, cdh_parcel.version) # print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded" # distribute the parcel info('Starting parcel distribution. This might take a while.') cmd = cdh_parcel.start_distribution() i = 0 while cmd.success == None: i += 1 sleep(5) cmd = cmd.fetch() s = '.' * i print '\r%s' % s, sys.stdout.flush() if cmd.success != True: err('Parcel distribution failed!') # make sure the distribution finishes while cdh_parcel.stage != "DISTRIBUTED": sleep(5) cdh_parcel = self.cluster.get_parcel(cdh_parcel.product, cdh_parcel.version) ok(cdh_parcel.product + ' ' + cdh_parcel.version + ' distributed') # activate the parcel cmd = cdh_parcel.activate() if cmd.success != True: err('Parcel activation failed!') # make sure the activation finishes while cdh_parcel.stage != "ACTIVATED": sleep(5) cdh_parcel = self.cluster.get_parcel(cdh_parcel.product, cdh_parcel.version) ok(cdh_parcel.product + ' ' + cdh_parcel.version + ' activated') def _create_service(self, sdata): try: self.cluster.get_service(sdata['sname']) info('Service %s had already been configured' % sdata['sname']) except ApiException: service = self.cluster.create_service(sdata['sname'], sdata['stype']) ok('Service %s had been created successfully' % sdata['sname']) for role in sdata['roles']: if role.has_key('rhost'): service.create_role(role['rname'], role['rtype'], role['rhost']) elif role.has_key('rhosts'): rid = 0 for host in role['rhosts']: rid += 1 service.create_role(role['rname'] + '-' + str(rid), role['rtype'], host) def setup_cdh(self): service_data = [{ 'sname': 'hdfs', 'stype': 'HDFS', 'roles': [{ 'rname': 'hdfs-namenode', 'rtype': 'NAMENODE', 'rhost': self.nn_host }, { 'rname': 'hdfs-secondarynamenode', 'rtype': 'SECONDARYNAMENODE', 'rhost': self.snn_host }, { 'rname': 'hdfs-datanode', 'rtype': 'DATANODE', 'rhosts': self.dn_hosts }] }, { 'sname': 'zookeeper', 'stype': 'ZOOKEEPER', 'roles': [{ 'rname': 'zookeeper', 'rtype': 'SERVER', 'rhosts': self.zk_hosts }] }, { 'sname': 'hbase', 'stype': 'HBASE', 'roles': [{ 'rname': 'hbase-master', 'rtype': 'MASTER', 'rhost': self.hm_host }, { 'rname': 'hdfs-regionserver', 'rtype': 'REGIONSERVER', 'rhosts': self.rs_hosts }] }, { 'sname': 'hive', 'stype': 'HIVE', 'roles': [{ 'rname': 'hive-metastore', 'rtype': 'HIVEMETASTORE', 'rhost': self.hms_host }, { 'rname': 'hive-server2', 'rtype': 'HIVESERVER2', 'rhost': self.hs2_host }, { 'rname': 'hive-gateway', 'rtype': 'GATEWAY', 'rhosts': self.dn_hosts }] }, { 'sname': 'mapreduce', 'stype': 'MAPREDUCE', 'roles': [{ 'rname': 'mapreduce-jobtracker', 'rtype': 'JOBTRACKER', 'rhost': self.jt_host }, { 'rname': 'mapreduce-tasktracker', 'rtype': 'TASKTRACKER', 'rhosts': self.tt_hosts }] }] for sdata in service_data: self._create_service(sdata) # additional config for hive try: hive_service = self.cluster.get_service('hive') hive_metastore_host = self.cm_host # should be same as cm's host, FQDN hive_metastore_name = 'hive' hive_metastore_password = '******' hive_metastore_database_port = '7432' hive_metastore_database_type = 'postgresql' hive_config = { 'hive_metastore_database_host' : hive_metastore_host, \ 'hive_metastore_database_name' : hive_metastore_name, \ 'hive_metastore_database_password' : hive_metastore_password, \ 'hive_metastore_database_port' : hive_metastore_database_port, \ 'hive_metastore_database_type' : hive_metastore_database_type } hive_service.update_config(hive_config) ok('Additional hive configs had been updated') except ApiException as e: err(e.message) # use auto configure for *-site.xml configs try: self.cluster.auto_configure() except ApiException as e: err(e.message) def start_cms(self): # start the management service info('Starting cloudera management service...') cms = self.cm.get_service() cms.start().wait() ok('Cloudera management service started successfully') def start_cdh(self): info('Excuting first run command. This might take a while.') cmd = self.cluster.first_run() while cmd.success == None: cmd = cmd.fetch() sleep(1) if cmd.success != True: err('The first run command failed: ' + cmd.resultMessage) ok('First run successfully executed. Your cluster has been set up!')
def main(): API = ApiResource(CM_HOST, version=5, username=ADMIN_USER, password=ADMIN_PASS) MANAGER = API.get_cloudera_manager() MANAGER.update_config(CM_CONFIG) print "Connected to CM host on " + CM_HOST + " and updated CM configuration" CLUSTER = init_cluster(API, CLUSTER_NAME, CDH_VERSION, CLUSTER_HOSTS, CM_HOST) print "Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CDH_VERSION deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, NAV_ROLENAME, NAV_ROLE_CONFIG, NAVMS_ROLENAME, NAVMS_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG) print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + CM_HOST deploy_parcels(CLUSTER, PARCELS) print "Downloaded and distributed parcels: " PRETTY_PRINT.pprint(PARCELS) zookeeper_service = deploy_zookeeper(CLUSTER, ZOOKEEPER_SERVICE_NAME, ZOOKEEPER_HOSTS, ZOOKEEPER_SERVICE_CONFIG, ZOOKEEPER_ROLE_CONFIG) print "Deployed ZooKeeper " + ZOOKEEPER_SERVICE_NAME + " to run on: " PRETTY_PRINT.pprint(ZOOKEEPER_HOSTS) hdfs_service = deploy_hdfs(CLUSTER, HDFS_SERVICE_NAME, HDFS_SERVICE_CONFIG, HDFS_NAMENODE_SERVICE_NAME, HDFS_NAMENODE_HOST, HDFS_NAMENODE_CONFIG, HDFS_SECONDARY_NAMENODE_HOST, HDFS_SECONDARY_NAMENODE_CONFIG, HDFS_DATANODE_HOSTS, HDFS_DATANODE_CONFIG, HDFS_GATEWAY_HOSTS, HDFS_GATEWAY_CONFIG) print "Deployed HDFS service " + HDFS_SERVICE_NAME + " using NameNode on " + HDFS_NAMENODE_HOST + ", SecondaryNameNode on " + HDFS_SECONDARY_NAMENODE_HOST + ", and DataNodes running on: " PRETTY_PRINT.pprint(HDFS_DATANODE_HOSTS) init_hdfs(hdfs_service, HDFS_SERVICE_NAME, CMD_TIMEOUT) print "Initialized HDFS service" # mapred and yarn are mutually exclusive; only deploy one of them #mapred_service = deploy_mapreduce(CLUSTER, MAPRED_SERVICE_NAME, MAPRED_SERVICE_CONFIG, MAPRED_JT_HOST, MAPRED_JT_CONFIG, MAPRED_TT_HOSTS, MAPRED_TT_CONFIG, MAPRED_GW_HOSTS, MAPRED_GW_CONFIG) print "Deployed MapReduce service " + MAPRED_SERVICE_NAME + " using JobTracker on " + MAPRED_JT_HOST + " and TaskTrackers running on " PRETTY_PRINT.pprint(MAPRED_TT_HOSTS) yarn_service = deploy_yarn(CLUSTER, YARN_SERVICE_NAME, YARN_SERVICE_CONFIG, YARN_RM_HOST, YARN_RM_CONFIG, YARN_JHS_HOST, YARN_JHS_CONFIG, YARN_NM_HOSTS, YARN_NM_CONFIG, YARN_GW_HOSTS, YARN_GW_CONFIG) print "Deployed YARN service " + YARN_SERVICE_NAME + " using ResourceManager on " + YARN_RM_HOST + ", JobHistoryServer on " + YARN_JHS_HOST + ", and NodeManagers on " PRETTY_PRINT.pprint(YARN_NM_HOSTS) spark_service = deploy_spark(CLUSTER, SPARK_SERVICE_NAME, SPARK_SERVICE_CONFIG, SPARK_MASTER_HOST, SPARK_MASTER_CONFIG, SPARK_WORKER_HOSTS, SPARK_WORKER_CONFIG, SPARK_GW_HOSTS, SPARK_GW_CONFIG) print "Deployed SPARK service " + SPARK_SERVICE_NAME + " using SparkMaster on " + SPARK_MASTER_HOST + " and SparkWorkers on " PRETTY_PRINT.pprint(SPARK_WORKER_HOSTS) deploy_hbase(CLUSTER, HBASE_SERVICE_NAME, HBASE_SERVICE_CONFIG, HBASE_HM_HOST, HBASE_HM_CONFIG, HBASE_RS_HOSTS, HBASE_RS_CONFIG, HBASE_THRIFTSERVER_SERVICE_NAME, HBASE_THRIFTSERVER_HOST, HBASE_THRIFTSERVER_CONFIG, HBASE_GW_HOSTS, HBASE_GW_CONFIG) print "Deployed HBase service " + HBASE_SERVICE_NAME + " using HMaster on " + HBASE_HM_HOST + " and RegionServers on " PRETTY_PRINT.pprint(HBASE_RS_HOSTS) hive_service = deploy_hive(CLUSTER, HIVE_SERVICE_NAME, HIVE_SERVICE_CONFIG, HIVE_HMS_HOST, HIVE_HMS_CONFIG, HIVE_HS2_HOST, HIVE_HS2_CONFIG, HIVE_WHC_HOST, HIVE_WHC_CONFIG, HIVE_GW_HOSTS, HIVE_GW_CONFIG) print "Depoyed Hive service " + HIVE_SERVICE_NAME + " using HiveMetastoreServer on " + HIVE_HMS_HOST + " and HiveServer2 on " + HIVE_HS2_HOST init_hive(hive_service) print "Initialized Hive service" impala_service = deploy_impala(CLUSTER, IMPALA_SERVICE_NAME, IMPALA_SERVICE_CONFIG, IMPALA_SS_HOST, IMPALA_SS_CONFIG, IMPALA_CS_HOST, IMPALA_CS_CONFIG, IMPALA_ID_HOSTS, IMPALA_ID_CONFIG) print "Deployed Impala service " + IMPALA_SERVICE_NAME + " using StateStore on " + IMPALA_SS_HOST + ", CatalogServer on " + IMPALA_CS_HOST + ", and ImpalaDaemons on " PRETTY_PRINT.pprint(IMPALA_ID_HOSTS) #Need to start the cluster now as subsequent services need the cluster to be runnign #TODO can we just start ZK, and maybe HDFS, instead of everything? It's just needed for the search service print "About to restart cluster" CLUSTER.stop().wait() CLUSTER.start().wait() print "Done restarting cluster" search_service = deploy_search(CLUSTER, SEARCH_SERVICE_NAME, SEARCH_SERVICE_CONFIG, SEARCH_SOLR_HOST, SEARCH_SOLR_CONFIG, SEARCH_GW_HOSTS, SEARCH_GW_CONFIG) print "Deployed Search service " + SEARCH_SERVICE_NAME + " using SOLRHost " + SEARCH_SOLR_HOST flume_service = deploy_flume(CLUSTER, FLUME_SERVICE_NAME, FLUME_SERVICE_CONFIG, FLUME_AGENT_HOSTS, FLUME_AGENT_CONFIG) print "Deployed Flume service " + FLUME_SERVICE_NAME + " using FlumeAgents on " PRETTY_PRINT.pprint(FLUME_AGENT_HOSTS) oozie_service = deploy_oozie(CLUSTER, OOZIE_SERVICE_NAME, OOZIE_SERVICE_CONFIG, OOZIE_SERVER_HOST, OOZIE_SERVER_CONFIG) print "Deployed Oozie service " + OOZIE_SERVICE_NAME + " using OozieServer on " + OOZIE_SERVER_HOST sqoop_service = deploy_sqoop(CLUSTER, SQOOP_SERVICE_NAME, SQOOP_SERVICE_CONFIG, SQOOP_SERVER_HOST, SQOOP_SERVER_CONFIG) print "Deployed Sqoop service " + SQOOP_SERVICE_NAME + " using SqoopServer on " + SQOOP_SERVER_HOST hue_service = deploy_hue(CLUSTER, HUE_SERVICE_NAME, HUE_SERVICE_CONFIG, HUE_SERVER_HOST, HUE_SERVER_CONFIG, HUE_KTR_HOST, HUE_KTR_CONFIG) print "Deployed HUE service " + HUE_SERVICE_NAME + " using HueServer on " + HUE_SERVER_HOST #deploy_accumulo(CLUSTER, ACCUMULO_SERVICE_NAME, ACCUMULO_SERVICE_CONFIG, ACCUMULO_MASTER_HOSTS, ACCUMULO_MASTER_CONFIG, ACCUMULO_TRACER_HOSTS, ACCUMULO_TRACER_CONFIG, ACCUMULO_TSERVER_HOSTS, ACCUMULO_TSERVER_CONFIG, ACCUMULO_LOGGER_HOSTS, ACCUMULO_LOGGER_CONFIG, ACCUMULO_MONITOR_HOST, ACCUMULO_MONITOR_CONFIG, ACCUMULO_GC_HOST, ACCUMULO_GC_CONFIG, ACCUMULO_GATEWAY_HOSTS, ACCUMULO_GATEWAY_CONFIG) print "About to restart cluster." CLUSTER.stop().wait() CLUSTER.start().wait() print "Done restarting cluster." post_startup(CLUSTER, hdfs_service, oozie_service) print "Finished deploying Cloudera cluster. Go to http://" + CM_HOST + ":7180 to administer the cluster." print "If the Oozie service (and therefore the HUE service as well, which depends on it) did not start properly, go to the Oozie service, stop it, click on the Actions button and choose 'Create Database', then start it." print "If there are any other services not running, restart them now."
class ClouderaManagerDeployment(object): def __init__(self, cm_server_address, cm_server_port=DEFAULT_CM_PORT, username=DEFAULT_CM_USERNAME, password=DEFAULT_CM_PASSWORD): self.cm_server_address = cm_server_address self.cm_server_port = cm_server_port self.username = username self.password = password def setup_api_resources(self): self.api = ApiResource(server_host=self.cm_server_address, server_port=self.cm_server_port, username=self.username, password=self.password, version=self._get_api_version()) self.cm = self.api.get_cloudera_manager() self.cluster = self.api.get_cluster('Cluster 1 (clusterdock)') def prep_for_start(self): pass def validate_services_started(self, timeout_min=10, healthy_time_threshold_sec=30): start_validating_time = time() healthy_time = None logger.info('Beginning service health validation...') while healthy_time is None or (time() - healthy_time < healthy_time_threshold_sec): if (time() - start_validating_time < timeout_min * 60): all_services = list(self.cluster.get_all_services()) + [self.cm.get_service()] at_fault_services = list() for service in all_services: if (service.serviceState != "NA" and service.serviceState != "STARTED"): at_fault_services.append([service.name, "NOT STARTED"]) elif (service.serviceState != "NA" and service.healthSummary != "GOOD"): checks = list() for check in service.healthChecks: if (check["summary"] not in ("GOOD", "DISABLED")): checks.append(check["name"]) at_fault_services.append([service.name, "Failed health checks: {0}".format(checks)]) if not healthy_time or at_fault_services: healthy_time = time() if not at_fault_services else None sleep(3) else: raise Exception(("Timed out after waiting {0} minutes for services to start " "(at fault: {1}).").format(timeout_min, at_fault_services)) logger.info("Validated that all services started (time: %.2f s).", time() - start_validating_time) def add_hosts_to_cluster(self, secondary_node_fqdn, all_fqdns): cm_utils.add_hosts_to_cluster(api=self.api, cluster=self.cluster, secondary_node_fqdn=secondary_node_fqdn, all_fqdns=all_fqdns) def update_hive_metastore_namenodes(self): for service in self.cluster.get_all_services(): if service.type == 'HIVE': logger.info('Updating NameNode references in Hive metastore...') update_metastore_namenodes_cmd = service.update_metastore_namenodes().wait() if not update_metastore_namenodes_cmd.success: logger.warning(("Failed to update NameNode references in Hive metastore " "(command returned %s)."), update_metastore_namenodes_cmd) def update_database_configs(self): cm_utils.update_database_configs(api=self.api, cluster=self.cluster) def _get_api_version(self): api_version_response = requests.get( "http://{0}:{1}/api/version".format(self.cm_server_address, self.cm_server_port), auth=(self.username, self.password)) api_version_response.raise_for_status() api_version = api_version_response.content if 'v' not in api_version: raise Exception("/api/version returned unexpected result (%s).", api_version) else: logger.info("Detected CM API %s.", api_version) return api_version.strip('v')
def main(): api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=api_num) cm = ClouderaManager(api) #cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url) MANAGER = api.get_cloudera_manager() #MANAGER.update_config) print "Connected to CM host on " + cm_host + " and updated CM configuration" CLUSTER = init_cluster(api, cluster_name, cdh_version, host_list, host_list) deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, NAV_ROLENAME, NAV_ROLE_CONFIG, NAVMS_ROLENAME, NAVMS_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG) print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + cm_host + "now service is stop!" deploy_parcels(CLUSTER, PARCELS) print "Downloaded and distributed parcels: " PRETTY_PRINT.pprint(PARCELS) zookeeper_service = deploy_zookeeper(CLUSTER, ZOOKEEPER_SERVICE_NAME, ZOOKEEPER_HOSTS, ZOOKEEPER_SERVICE_CONFIG, ZOOKEEPER_ROLE_CONFIG) print "Deployed ZooKeeper " + ZOOKEEPER_SERVICE_NAME + " to run on: " PRETTY_PRINT.pprint(ZOOKEEPER_HOSTS) hdfs_service = deploy_hdfs(CLUSTER, HDFS_SERVICE_NAME, HDFS_SERVICE_CONFIG, HDFS_NAMENODE_SERVICE_NAME, HDFS_NAMENODE_HOST, HDFS_NAMENODE_CONFIG, HDFS_SECONDARY_NAMENODE_HOST, HDFS_SECONDARY_NAMENODE_CONFIG, HDFS_DATANODE_HOSTS, HDFS_DATANODE_CONFIG, HDFS_GATEWAY_HOSTS, HDFS_GATEWAY_CONFIG) print "Deployed HDFS service " + HDFS_SERVICE_NAME + " using NameNode on " + HDFS_NAMENODE_HOST + ", SecondaryNameNode on " + HDFS_SECONDARY_NAMENODE_HOST + ", and DataNodes running on: " PRETTY_PRINT.pprint(HDFS_DATANODE_HOSTS) init_hdfs(hdfs_service, HDFS_SERVICE_NAME, 600) # Test move last method to here orginal is from post_startup function #hdfs_service.create_hdfs_tmp() print "Initialized HDFS service" yarn_service = deploy_yarn(CLUSTER, YARN_SERVICE_NAME, YARN_SERVICE_CONFIG, YARN_RM_HOST, YARN_RM_CONFIG, YARN_JHS_HOST, YARN_JHS_CONFIG, YARN_NM_HOSTS, YARN_NM_CONFIG, YARN_GW_HOSTS, YARN_GW_CONFIG) print "Deployed YARN service " + YARN_SERVICE_NAME + " using ResourceManager on " + YARN_RM_HOST + ", JobHistoryServer on " + YARN_JHS_HOST + ", and NodeManagers on " PRETTY_PRINT.pprint(YARN_NM_HOSTS) #deploy_hbase(CLUSTER, HBASE_SERVICE_NAME, HBASE_SERVICE_CONFIG, HBASE_HM_HOST, HBASE_HM_CONFIG, HBASE_RS_HOSTS, HBASE_RS_CONFIG, HBASE_THRIFTSERVER_SERVICE_NAME, HBASE_THRIFTSERVER_HOST, HBASE_THRIFTSERVER_CONFIG, HBASE_GW_HOSTS, HBASE_GW_CONFIG) deploy_hbase(CLUSTER, HBASE_SERVICE_NAME, HBASE_SERVICE_CONFIG, HBASE_HM_HOST, HBASE_HM_CONFIG, HBASE_RS_HOSTS, HBASE_RS_CONFIG, HBASE_GW_HOSTS, HBASE_GW_CONFIG) print "Deployed HBase service " + HBASE_SERVICE_NAME + " using HMaster on " + HBASE_HM_HOST + " and RegionServers on " PRETTY_PRINT.pprint(HBASE_RS_HOSTS) hive_service = deploy_hive(CLUSTER, HIVE_SERVICE_NAME, HIVE_SERVICE_CONFIG, HIVE_HMS_HOST, HIVE_HMS_CONFIG, HIVE_HS2_HOST, HIVE_HS2_CONFIG, HIVE_GW_HOSTS, HIVE_GW_CONFIG) print "Depoyed Hive service " + HIVE_SERVICE_NAME + " using HiveMetastoreServer on " + HIVE_HMS_HOST + " and HiveServer2 on " + HIVE_HS2_HOST init_hive(hive_service) print "Initialized Hive service" impala_service = deploy_impala(CLUSTER, IMPALA_SERVICE_NAME, IMPALA_SERVICE_CONFIG, IMPALA_SS_HOST, IMPALA_SS_CONFIG, IMPALA_CS_HOST, IMPALA_CS_CONFIG, IMPALA_ID_HOSTS, IMPALA_ID_CONFIG) print "Deployed Impala service " + IMPALA_SERVICE_NAME + " using StateStore on " + IMPALA_SS_HOST + ", CatalogServer on " + IMPALA_CS_HOST + ", and ImpalaDaemons on " PRETTY_PRINT.pprint(IMPALA_ID_HOSTS) #CLUSTER.stop().wait() CLUSTER.start().wait() #post_startup(CLUSTER, hdfs_service, oozie_service) oozie_service = deploy_oozie(CLUSTER, OOZIE_SERVICE_NAME, OOZIE_SERVICE_CONFIG, OOZIE_SERVER_HOST, OOZIE_SERVER_CONFIG) print "Deployed Oozie service " + OOZIE_SERVICE_NAME + " using OozieServer on " + OOZIE_SERVER_HOST hue_service = deploy_hue(CLUSTER, HUE_SERVICE_NAME, HUE_SERVICE_CONFIG, HUE_SERVER_HOST, HUE_SERVER_CONFIG, HUE_KTR_HOST, HUE_KTR_CONFIG) print "Deployed HUE service " + HUE_SERVICE_NAME + " using HueServer on " + HUE_SERVER_HOST #post_startup(CLUSTER, hdfs_service) print "About to restart cluster." CLUSTER.stop().wait() CLUSTER.start().wait() print "Done restarting cluster." post_startup(CLUSTER, hdfs_service, oozie_service)
'firehose_database_name': 'firehose' } logging.info('Updating role configurations') for group in mgmt.get_all_role_config_groups(): if group.roleType == "HOSTMONITOR": group.update_config(mgmt_hm_config) if group.roleType == "SERVICEMONITOR": group.update_config(mgmt_hm_config) logging.info('Starting the Cloudera Manager service') mgmt.start().wait() # Update the Parcels repo logging.info('Updating the remote parcels repo') cm_config = api.get_cloudera_manager().get_config(view='full') repo_urls = cdh_parcel_repo + ',' + kafka_parcel_repo api.get_cloudera_manager().update_config( {'REMOTE_PARCEL_REPO_URLS': repo_urls}) time.sleep(10) # Download the CDH Parcel logging.info('Downloading the CDH parcel') cluster_name = 'Open Data Platform' cluster = api.create_cluster(cluster_name, version='CDH5') cluster.add_hosts(hosts) cdh_parcel = cluster.get_parcel('CDH', cdh_parcel_version) cdh_parcel.start_download() while True: cdh_parcel = cluster.get_parcel('CDH', cdh_parcel_version) if cdh_parcel.stage == 'DOWNLOADED':
#!/usr/bin/env python import socket import time from cm_api.api_client import ApiResource #initialize hosts = [ ] cm_host = "cloudera-pe-cm01" api = ApiResource(cm_host, username="******", password="******") # Distribute the CDH parcel parcel_repo = 'http://archive.cloudera.com/cdh5/parcels/5.2.0' #parcel_repo = 'http://archive.cloudera.com/cdh5/parcels/5.1.3/' cm_config = api.get_cloudera_manager().get_config(view='full') repo_config = cm_config['REMOTE_PARCEL_REPO_URLS'] value = repo_config.value or repo_config.default value += ',' + parcel_repo api.get_cloudera_manager().update_config({'REMOTE_PARCEL_REPO_URLS': value}) time.sleep(10) # create cluster, add the hosts cluster = api.create_cluster("cloudera-pe-test", "CDH5") #api.create_host("master", "ip-10-238-154-140", "10.238.154.140") #api.create_host("w01", "ip-10-143-183-98", "10.143.183.98") #api.create_host("w02", "ip-10-140-38-88", "10.140.38.88") #api.create_host("w03", "ip-10-140-28-243", "10.140.28.243") #hosts.append("master") #hosts.append("w01") #hosts.append("w02") #hosts.append("w03")
def create_cluster(config_dict): config.read(['./conf/hadrian.ini','./conf/cluster_specs.ini', './conf/cloudera-manager/cm.ini']) cm_cluster_name = config_grabber("Globals")['cm.cluster.name'] cm_username = config_grabber("Globals")['cm.username'] cm_password = config_grabber("Globals")['cm.password'] cm_port = config_grabber("Globals")['cm.port'] version = config_grabber('Globals')['cdh.cluster.version'] cm_server = config_grabber(cm_cluster_name + '-en')['cm.server'] #Grab all configuration files in the directory with the CM Cluster Name. for i in os.listdir('./conf/' + cm_cluster_name): config.read('./conf/' + cm_cluster_name + '/' + i) all_nodes = list() while (get_cm_status(cm_server + ':' + cm_port) != 200): print 'Waiting for CM Server to start... ' time.sleep(15) api = ApiResource(cm_server, cm_port, cm_username, cm_password) # create cluster cluster = api.create_cluster(cm_cluster_name, version.upper()) #Config CM print 'Applying any configuration changes to Cloudera Manager' cmanager = api.get_cloudera_manager() cmanager.update_config(config_grabber('cloudera-manager-updates')) planned_nodes = config_grabber(cm_cluster_name + '-en')['full.list'].split(',') for k, v in config_grabber(cm_cluster_name + '-dn').iteritems(): for j in v.split(','): planned_nodes.append(j) # TODO make this smarter. show which agents haven't checked in. Add the option to continue without them. if len(api.get_all_hosts()) != len(planned_nodes): print 'Waiting for all agents to check into the CM Server before continuing.' while len(planned_nodes) > api.get_all_hosts(): print 'Waiting for the final set of CM Agent nodes to check in.' time.sleep(5) print 'Updating Rack configuration for data nodes.' all_hosts = list() for host in api.get_all_hosts(): all_hosts.append(host.hostId) for k,v in config_grabber(cm_cluster_name + '-dn').iteritems(): if host.hostname in v: print 'Setting host: ' + host.hostname + ' to rack /default/' + k host.set_rack_id('/default/' + k) print 'Adding all hosts to cluster.' cluster.add_hosts(all_hosts) # download CDH Parcels # TODO add some logic here to make the parcel list something that's read from the hadrian.ini # This will allow support for other CDH packages, Search, etc. if config_grabber('Globals')['cdh.distribution.method'] == 'parcels': distribute_parcel(cluster, 'CDH', config_grabber("Globals")['cdh.parcel.version']) if config_dict.get('hdfs_ha') == True: create_zookeeper_service(config_dict, cluster) create_hdfs_service(config_dict, cluster) cmd = cluster.deploy_client_config() if not cmd.wait(CMD_TIMEOUT).success: print 'Failed to deploy client configurations' else: print 'Client configuration deployment complete.' create_mapred_service(config_dict, cluster, cm_server) if config_dict.get('hbase') == True: if config_dict.get('hdfs_ha') == False: create_zookeeper_service(config_dict, cluster) create_hbase_service(config_dict, cluster) if config_dict.get('hive') == True: create_hive_service(config_dict, cluster) print 'Starting final client configuration deployment for all services.' cmd = cluster.deploy_client_config() if not cmd.wait(CMD_TIMEOUT).success: print 'Failed to deploy client configuration.' else: print 'Client configuration deployment complete. The cluster is all yours. Happy Hadooping.'
def main(): API = ApiResource(cm_config.CM_HOST, version=16, username=cm_config.ADMIN_USER, password=cm_config.ADMIN_PASSWD) MANAGER = API.get_cloudera_manager() MANAGER.update_config(cm_config.CM_CONFIG) print "Connected to CM host on " + cm_config.CM_HOST + " and updated CM configuration" CLUSTER = init_cluster(API, cm_config.CLUSTER_NAME, cm_config.CDH_VERSION, cm_config.CLUSTER_HOSTS, cm_config.CM_HOST) print "Initialized cluster " + cm_config.CLUSTER_NAME + " which uses CDH version " + cm_config.CDH_VERSION deploy_management(MANAGER, cm_config.MGMT_SERVICENAME, cm_config.MGMT_SERVICE_CONFIG, cm_config.MGMT_ROLE_CONFIG, cm_config.AMON_ROLENAME, cm_config.AMON_ROLE_CONFIG, cm_config.APUB_ROLENAME, cm_config.APUB_ROLE_CONFIG, cm_config.ESERV_ROLENAME, cm_config.ESERV_ROLE_CONFIG, cm_config.HMON_ROLENAME, cm_config.HMON_ROLE_CONFIG, cm_config.SMON_ROLENAME, cm_config.SMON_ROLE_CONFIG, cm_config.RMAN_ROLENAME, cm_config.RMAN_ROLE_CONFIG) print "Deployed CM management service " + cm_config.MGMT_SERVICENAME + " to run on " + cm_config.CM_HOST deploy_parcels(CLUSTER, cm_config.PARCELS) print "Downloaded and distributed parcels: " pretty_print(cm_config.PARCELS) zookeeper_service = deploy_zookeeper(CLUSTER, cm_config.ZOOKEEPER_SERVICE_NAME, cm_config.ZOOKEEPER_SERVER_HOSTS, cm_config.ZOOKEEPER_SERVICE_CONFIG, cm_config.ZOOKEEPER_ROLE_CONFIG) print "Deployed ZooKeeper " + cm_config.ZOOKEEPER_SERVICE_NAME + " to run on: " pretty_print(cm_config.ZOOKEEPER_SERVER_HOSTS) hdfs_service = deploy_hdfs( CLUSTER, cm_config.HDFS_SERVICE_NAME, cm_config.HDFS_SERVICE_CONFIG, cm_config.HDFS_NAMENODE_SERVICE_NAME, cm_config.HDFS_NAMENODE_HOST, cm_config.HDFS_NAMENODE_CONFIG, cm_config.HDFS_SECONDARY_NAMENODE_HOST, cm_config.HDFS_SECONDARY_NAMENODE_CONFIG, cm_config.HDFS_DATANODE_HOSTS, cm_config.HDFS_DATANODE_CONFIG, cm_config.HDFS_GATEWAY_HOSTS, cm_config.HDFS_GATEWAY_CONFIG) print "Deployed HDFS service " + cm_config.HDFS_SERVICE_NAME + " using NameNode on " + cm_config.HDFS_NAMENODE_HOST + ", SecondaryNameNode on " + cm_config.HDFS_SECONDARY_NAMENODE_HOST + ", and DataNodes running on: " pretty_print(cm_config.HDFS_DATANODE_HOSTS) init_hdfs(hdfs_service, cm_config.HDFS_SERVICE_NAME, cm_config.CMD_TIMEOUT) print "Initialized HDFS service" # mapred and yarn are mutually exclusive; only deploy one of them # mapred_service = deploy_mapreduce(CLUSTER, MAPRED_SERVICE_NAME, MAPRED_SERVICE_CONFIG, MAPRED_JT_HOST, MAPRED_JT_CONFIG, MAPRED_TT_HOSTS, MAPRED_TT_CONFIG, MAPRED_GW_HOSTS, MAPRED_GW_CONFIG) # print "Deployed MapReduce service " + cm_config.MAPRED_SERVICE_NAME + " using JobTracker on " + cm_config.MAPRED_JT_HOST + " and TaskTrackers running on " # pretty_print(cm_config.MAPRED_TT_HOSTS) yarn_service = deploy_yarn( CLUSTER, cm_config.YARN_SERVICE_NAME, cm_config.YARN_SERVICE_CONFIG, cm_config.YARN_RM_HOST, cm_config.YARN_RM_CONFIG, cm_config.YARN_JHS_HOST, cm_config.YARN_JHS_CONFIG, cm_config.YARN_NM_HOSTS, cm_config.YARN_NM_CONFIG, cm_config.YARN_GW_HOSTS, cm_config.YARN_GW_CONFIG) print "Deployed YARN service " + cm_config.YARN_SERVICE_NAME + " using ResourceManager on " + cm_config.YARN_RM_HOST + ", JobHistoryServer on " + cm_config.YARN_JHS_HOST + ", and NodeManagers on " pretty_print(cm_config.YARN_NM_HOSTS) spark_service = deploy_spark(CLUSTER, cm_config.SPARK_SERVICE_NAME, cm_config.SPARK_SERVICE_CONFIG, cm_config.SPARK_YARN_HISTORY_SERVER_HOST, cm_config.SPARK_YARN_HISTORY_SERVER_CONFIG, cm_config.SPARK_GATEWAY_HOST, cm_config.SPARK_GATEWAY_CONFIG) print "Deployed SPARK service " + cm_config.SPARK_SERVICE_NAME + " using SparkHistoryServer on " + cm_config.SPARK_YARN_HISTORY_SERVER_HOST + " and Spark Gateway on " pretty_print(cm_config.SPARK_GATEWAY_HOST) deploy_hbase(CLUSTER, cm_config.HBASE_SERVICE_NAME, cm_config.HBASE_SERVICE_CONFIG, cm_config.HBASE_HM_HOST, cm_config.HBASE_HM_CONFIG, cm_config.HBASE_RS_HOSTS, cm_config.HBASE_RS_CONFIG) print "Deployed HBase service " + cm_config.HBASE_SERVICE_NAME + " using HMaster on " + cm_config.HBASE_HM_HOST + " and RegionServers on " pretty_print(cm_config.HBASE_RS_HOSTS) hive_service = deploy_hive( CLUSTER, cm_config.HIVE_SERVICE_NAME, cm_config.HIVE_SERVICE_CONFIG, cm_config.HIVE_HMS_HOST, cm_config.HIVE_HMS_CONFIG, cm_config.HIVE_HS2_HOST, cm_config.HIVE_HS2_CONFIG, cm_config.HIVE_WHC_HOST, cm_config.HIVE_WHC_CONFIG, cm_config.HIVE_GW_HOSTS, cm_config.HIVE_GW_CONFIG) print "Depoyed Hive service " + cm_config.HIVE_SERVICE_NAME + " using HiveMetastoreServer on " + cm_config.HIVE_HMS_HOST + " and HiveServer2 on " + cm_config.HIVE_HS2_HOST hive_service = CLUSTER.get_service("HIVE") hive_mysqldb_deploy() init_hive(hive_service) print "Initialized Hive service" impala_service = deploy_impala( CLUSTER, cm_config.IMPALA_SERVICE_NAME, cm_config.IMPALA_SERVICE_CONFIG, cm_config.IMPALA_SS_HOST, cm_config.IMPALA_SS_CONFIG, cm_config.IMPALA_CS_HOST, cm_config.IMPALA_CS_CONFIG, cm_config.IMPALA_ID_HOSTS, cm_config.IMPALA_ID_CONFIG) print "Deployed Impala service " + cm_config.IMPALA_SERVICE_NAME + " using StateStore on " + cm_config.IMPALA_SS_HOST + ", CatalogServer on " + cm_config.IMPALA_CS_HOST + ", and ImpalaDaemons on " pretty_print(cm_config.IMPALA_ID_HOSTS) kafka_service = deploy_kafka(CLUSTER, cm_config.KAFKA_SERVICE_NAME, cm_config.KAFKA_SERVICE_CONFIG, cm_config.KAFKA_BROKER_HOSTS, cm_config.KAFKA_BROKER_CONFIG) print "Deployed Kafka service :" + cm_config.KAFKA_SERVICE_NAME + " using Broker on" pretty_print(cm_config.KAFKA_BROKER_HOSTS) #Need to start the cluster now as subsequent services need the cluster to be runnign #TODO can we just start ZK, and maybe HDFS, instead of everything? It's just needed for the search service # CLUSTER.first_run().wait() print("Deploy client config") CLUSTER.deploy_client_config().wait() print("Start hdfs,zookeeper service") zookeeper_service.start().wait() hdfs_service.start().wait() time.sleep(20) print("Create spark applicationHistory directory") comand="ssh -p %s root@%s 'sudo -u hdfs hadoop fs -mkdir -p /user/spark/applicationHistory && "\ "sudo -u hdfs hadoop fs -chmod 777 /user/spark/applicationHistory && "\ "sudo -u hdfs hadoop fs -chown -R spark:spark /user/spark ' >/dev/null 2>&1 ;echo $?"%\ (cm_config.NAME_NODE_HOST_INFO[3],cm_config.NAME_NODE_HOST_INFO[1]) shell_command(comand) print "About to restart cluster" CLUSTER.restart().wait() # CLUSTER.restart(redeploy_client_configuration=True).wait() print "Done restarting cluster" hdfs_service = CLUSTER.get_service("HDFS") post_startup(CLUSTER, hdfs_service) hive_service.restart().wait() impala_service.restart().wait() print "Finished deploying Cloudera cluster. Go to http://" + cm_config.CM_HOST + ":7180 to administer the cluster." print "If there are any other services not running, restart them now."