def main(): """ Kerberizes a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, settings.use_tls, 8) cloudera_manager = api.get_cloudera_manager() cluster = api.get_cluster(settings.cluster) mgmt_service = cloudera_manager.get_service() if verify_cloudera_manager_has_kerberos_principal(cloudera_manager): wait_for_command('Stopping the cluster', cluster.stop()) wait_for_command('Stopping MGMT services', mgmt_service.stop()) configure_services(cluster) wait_for_generate_credentials(cloudera_manager) wait_for_command('Deploying client configs.', cluster.deploy_client_config()) wait_for_command('Deploying cluster client configs', cluster.deploy_cluster_client_config()) wait_for_command('Starting MGMT services', mgmt_service.start()) wait_for_command('Starting the cluster', cluster.start()) else: print "Cluster does not have Kerberos admin credentials. Exiting!" return 0
def create_cluster(): CM_HOST = (load_cfg(ansible_path+"/group_vars/all")).get("cm_host") USERNAME = (load_cfg(ansible_path+"/group_vars/all")).get("cm_username") PASSWORD = (load_cfg(ansible_path+"/group_vars/all")).get("cm_password") api = ApiResource(CM_HOST, version=API_VERSION, username=USERNAME, password=PASSWORD) cluster = api.get_cluster(CLUSTER_NAME) return cluster
def main(): module = build_module() choice_map = {'present': present, 'distributed': distributed, 'activated': activated, 'absent': absent, 'infos': infos} params = module.params has_changed = False api = ApiResource(params["cm_host"], username=params["cm_login"], password=params["cm_login"], version=params["api_version"]) try: cluster = api.get_cluster(params["cluster_name"]) except ApiException as e: module.fail_json(msg="Cluster error : {0}".format(e)) if params["product"] and params["version"]: parcel = get_parcel(cluster, params["product"], params["version"]) if params["state"] != "infos": error, has_changed, result, meta = choice_map.get(params['state'])(cluster, parcel) if error: module.fail_json(msg=result) module.exit_json(changed=has_changed, msg=result, meta=meta) else: meta = { "product": parcel.product, "version": parcel.version, "stage": parcel.stage } module.exit_json(changed=False, msg="Parcel informations gathered", meta=meta) elif not params["product"] and not params["version"] and params["state"] == "infos": module.exit_json(changed=has_changed, msg="Parcel informations gathered", meta=infos(cluster))
def main(): """ Kerberizes a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, settings.use_tls, 8) cloudera_manager = api.get_cloudera_manager() cluster = api.get_cluster(settings.cluster) mgmt_service = cloudera_manager.get_service() if verify_cloudera_manager_has_kerberos_principal(cloudera_manager): wait_for_command('Stopping the cluster', cluster.stop()) wait_for_command('Stopping MGMT services', mgmt_service.stop()) configure_services(cluster) wait_for_generate_credentials(cloudera_manager) wait_for_command('Deploying client configs.', cluster.deploy_client_config()) wait_for_command('Deploying cluster client configs', cluster.deploy_cluster_client_config()) wait_for_command('Starting MGMT services', mgmt_service.start()) wait_for_command('Starting the cluster', cluster.start()) else: print "Cluster does not have Kerberos admin credentials. Exiting!" return 0
def main(): #print sys.argv[0] #for i in range(1, len(sys.argv)): # print "param ", i, sys.argv[i] # get a handle on the instance of CM that we have running api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=13) # get the CM instancepython2.7 setuptools cm = ClouderaManager(api) cluster = api.get_cluster(cluster_name) # distribution_parcels(api, cluster) cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "The first run command failed: " + cmd.resultMessage() exit(0) print "First run successfully executed. Your cluster has been set up!"
class ImpalaCluster(object): def __init__(self, cm_host, cm_cluster_name, username, password): self.cm_api = ApiResource(cm_host, username=username, password=password) self.hosts = dict() self.services = list() self.cluster = self.cm_api.get_cluster(cm_cluster_name) if self.cluster is None: raise RuntimeError, 'Cluster name "%s" not found' % cm_cluster_name self.__load_hosts() self.__impala_service = ImpalaService(self) def _get_all_services(self): return self.cluster.get_all_services() def get_impala_service(self): return self.__impala_service def __load_hosts(self): self.hosts = dict() # Search for all hosts that are in the target cluster. # There is no API that provides the list of host in a given cluster, so to find them # we must loop through all the hosts and check the cluster name matches. for host_info in self.cm_api.get_all_hosts(): # host_info doesn't include a link to the roleRef so need to do another lookup # based on the hostId. host = self.cm_api.get_host(host_info.hostId) for roleRef.get('clusterName') == self.cluster_name: self.hosts[host_info.hostId] = Host(host) break
def main(): # connect cm api api = ApiResource(CM_HOST, 7180, username=CM_USERNAME, password=CM_PASSWORD) manager = api.get_cloudera_manager() # no need to update cm config #manager.update_config(cm_host) print("[INFO] Connected to CM host on " + CM_HOST) # create cluster object try: cluster = api.get_cluster(name=CLUSTER_NAME) except: cluster = init_cluster(api, CLUSTER_NAME, CLUSTER_VERSION, CLUSTER_NODE_COUNT) print("[INFO] Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CLUSTER_VERSION) # mgmt_servicename = "MGMT" amon_role_name = "ACTIVITYMONITOR" apub_role_name = "ALERTPUBLISHER" eserv_role_name = "EVENTSERVER" hmon_role_name = "HOSTMONITOR" smon_role_name = "SERVICEMONITOR" nav_role_name = "NAVIGATOR" navms_role_name = "NAVIGATORMETADATASERVER" rman_role_name = "REPORTMANAGER" deploy_management(manager, mgmt_servicename, amon_role_name, apub_role_name, eserv_role_name, hmon_role_name, smon_role_name, nav_role_name, navms_role_name, rman_role_name) print("[INFO] Deployed CM management service " + mgmt_servicename + " to run on " + CM_HOST) # assign_roles(api, cluster) print("[INFO] all roles have assigned.") # # Custom role config groups cannot be automatically configured: Gateway Group 1 (error 400) try: cluster.auto_configure() except: pass update_custom_config(api, cluster) print("[INFO] all servies and roles have configured.") # cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if not cmd.success: print("[ERROR] The first run command failed: " + cmd.resultMessage()) else: print( "[INFO] First run successfully executed. Your cluster has been set up!" )
def main(): API = ApiResource(CM_HOST, version=5, username=ADMIN_USER, password=ADMIN_PASS) print "Connected to CM host on " + CM_HOST CLUSTER = API.get_cluster(CLUSTER_NAME) print "About to stop cluster." CLUSTER.stop().wait() print "Done stopping cluster."
def main(): API = ApiResource(CM_HOST, version=5, username=ADMIN_USER, password=ADMIN_PASS) print "Connected to CM host on " + CM_HOST CLUSTER = API.get_cluster(CLUSTER_NAME) print "About to restart cluster." CLUSTER.restart().wait() print "Done restarting cluster."
def do_call(host, port, version, user, password, cluster_name, parcel_name, parcel_version, parcel_repo, init_pre_dir, init_post_dir): api = ApiResource(host, port, user, password, False, version) if not parcel_repo.endswith('/'): parcel_repo += '/' if re.match(REGEX_VERSION, parcel_version) is None or re.match(REGEX_VERSION, parcel_version).group() != parcel_version: raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] expected to match regular expression [' + REGEX_VERSION + ']') if not parcel_repo.endswith(parcel_version + '/'): raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] when compared with repository [' + parcel_repo + ']') cm_config = api.get_cloudera_manager().get_config(view='full') repo_config = cm_config['REMOTE_PARCEL_REPO_URLS'] repo_list = repo_config.value or repo_config.default if parcel_repo not in repo_list: repo_list += ',' + parcel_repo api.get_cloudera_manager().update_config({'REMOTE_PARCEL_REPO_URLS': repo_list}) time.sleep(POLL_SEC) # The parcel synchronize end-point is not exposed via the API, so sleep instead cluster_names = [] if cluster_name is None: for cluster in api.get_all_clusters(): cluster_names.append(cluster.name) else: cluster_names.append(cluster_name) for cluster_name_itr in cluster_names: print 'Cluster [DEPLOYMENT] starting ... ' cluster = api.get_cluster(cluster_name_itr) parcel = cluster.get_parcel(parcel_name, parcel_version) print 'Parcel [DEPLOYMENT] starting ... ' do_parcel_op(cluster, parcel_name, parcel_version, 'DOWNLOAD', 'AVAILABLE_REMOTELY', 'DOWNLOADED', 'start_download') do_parcel_op(cluster, parcel_name, parcel_version, 'DISTRIBUTE', 'DOWNLOADED', 'DISTRIBUTED', 'start_distribution') do_parcel_op(cluster, parcel_name, parcel_version, 'ACTIVATE', 'DISTRIBUTED', 'ACTIVATED', 'activate') parcel = cluster.get_parcel(parcel_name, parcel_version) if parcel.stage != 'ACTIVATED': raise Exception('Parcel is currently mid-stage [' + parcel.stage + '], please wait for this to complete') print 'Parcel [DEPLOYMENT] finished' if init_pre_dir is not None and os.path.isdir(init_pre_dir): print 'Cluster [PRE_INIT] starting ... ' for script in glob.glob(init_pre_dir + '/*.sh'): subprocess.call([script]) print 'Cluster [PRE_INIT] finihsed' print 'Cluster [CONFIG_DEPLOYMENT] starting ... ' cluster.deploy_client_config() cmd = cluster.deploy_client_config() if not cmd.wait(TIMEOUT_SEC).success: raise Exception('Failed to deploy client configs') print 'Cluster [CONFIG_DEPLOYMENT] finihsed' print 'Cluster [STOP] starting ... ' cluster.stop().wait() print 'Cluster [STOP] finihsed' print 'Cluster [START] starting ... ' cluster.start().wait() print 'Cluster [START] finihsed' if init_post_dir is not None and os.path.isdir(init_post_dir): print 'Cluster [POST_INIT] starting ... ' for script in glob.glob(init_post_dir + '/*.sh'): subprocess.call([script]) print 'Cluster [POST_INIT] finihsed' print 'Cluster [DEPLOYMENT] finished'
def main(): """ Add peer to the cluster. @rtype: number @returns: A number representing the status of success. """ settings = parse_args() if len(sys.argv) == 1 or len(sys.argv) > 17: print_usage_message() quit(1) api = ApiResource(settings.server, settings.port, settings.username, settings.password, settings.use_tls, 14) yarn_service = get_service_name('YARN', api, settings.cluster_name) hdfs_name = get_service_name('HDFS', api, settings.cluster_name) hdfs = api.get_cluster(settings.cluster_name).get_service(hdfs_name) hdfs_cloud_args = ApiHdfsCloudReplicationArguments(None) hdfs_cloud_args.sourceService = ApiServiceRef(None, peerName=None, clusterName=settings.cluster_name, serviceName=hdfs_name) hdfs_cloud_args.sourcePath = settings.source_path hdfs_cloud_args.destinationPath = settings.target_path hdfs_cloud_args.destinationAccount = settings.account_name hdfs_cloud_args.mapreduceServiceName = yarn_service # creating a schedule with daily frequency start = datetime.datetime.now() # The time at which the scheduled activity is triggered for the first time. end = start + datetime.timedelta(days=365) # The time after which the scheduled activity will no longer be triggered. schedule = hdfs.create_replication_schedule(start, end, "DAY", 1, True, hdfs_cloud_args) ## Updating the Schedule's properties # schedule.hdfsArguments.removeMissingFiles = False schedule.alertOnFail = True schedule = hdfs.update_replication_schedule(schedule.id, schedule) print "Schedule created with Schdule ID: " + str(schedule.id) # print schedule.alertOnFail # print schedule.hdfsArguments.removeMissingFiles # print schedule.hdfsArguments.sourcePath # print schedule.hdfsArguments.preserveXAttrs # print schedule.hdfsArguments.exclusionFilters # print schedule.hdfsArguments.replicationStrategy # print schedule.hdfsArguments.numMaps # print schedule.hdfsArguments.userName # print schedule.hdfsArguments.schedulerPoolName # print type(schedule) return 0
def main(): #print sys.argv[0] #for i in range(1, len(sys.argv)): # print "param ", i, sys.argv[i] # get a handle on the instance of CM that we have running api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=13) # get the CM instancepython2.7 setuptools cm = ClouderaManager(api) cluster = api.get_cluster(cluster_name) distribution_parcels(api, cluster)
def main(): # connect cm api api = ApiResource(CM_HOST, 7180, username=CM_USERNAME, password=CM_PASSWORD) manager = api.get_cloudera_manager() # no need to update cm config #manager.update_config(cm_host) print("[INFO] Connected to CM host on " + CM_HOST) # create cluster object try: cluster = api.get_cluster(name=CLUSTER_NAME) except: cluster = init_cluster(api, CLUSTER_NAME, CLUSTER_VERSION, CLUSTER_NODE_COUNT) print("[INFO] Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CLUSTER_VERSION) # mgmt_servicename = "MGMT" amon_role_name = "ACTIVITYMONITOR" apub_role_name = "ALERTPUBLISHER" eserv_role_name = "EVENTSERVER" hmon_role_name = "HOSTMONITOR" smon_role_name = "SERVICEMONITOR" nav_role_name = "NAVIGATOR" navms_role_name = "NAVIGATORMETADATASERVER" rman_role_name = "REPORTMANAGER" deploy_management(manager, mgmt_servicename, amon_role_name, apub_role_name, eserv_role_name, hmon_role_name, smon_role_name, nav_role_name, navms_role_name, rman_role_name) print("[INFO] Deployed CM management service " + mgmt_servicename + " to run on " + CM_HOST) # assign_roles(api, cluster) print("[INFO] all roles have assigned.") # # Custom role config groups cannot be automatically configured: Gateway Group 1 (error 400) try: cluster.auto_configure() except: pass update_custom_config(api, cluster) print("[INFO] all servies and roles have configured.") # cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if not cmd.success: print("[ERROR] The first run command failed: " + cmd.resultMessage()) else: print("[INFO] First run successfully executed. Your cluster has been set up!")
def main(): #script options parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='Be more verbose') parser.add_option('-m', '--manager', dest='manager', help='CDH manager address',) parser.add_option('-a', '--action', type='choice', action='store', dest='action', choices=['manager', 'service_list', 'service_health'], default='manager', help='Action to take') parser.add_option('-n', '--name', dest='name', help='Name of the item to check',) (options, args) = parser.parse_args() #logging logging.basicConfig() if not options.manager: print 'You must specify a manager address' sys.exit(-1) if options.verbose: logging.getLogger().setLevel(logging.DEBUG) api = ApiResource(options.manager, username=ADMIN_USER, password=ADMIN_PASS, version=9) try: cluster = api.get_cluster(CLUSTER_NAME) if options.action == 'manager': print 'OK' sys.exit(0) except urllib2.URLError: print 'Could not connect to API' sys.exit(-1) if options.action == 'service_list': response = {'data': [{'{#SERVICENAME}': svc.name} for svc in cluster.get_all_services()]} print json.dumps(response) elif options.action == 'service_health': if not options.name: print 'Must specify a name for this check' sys.exit(-1) svc = cluster.get_service(options.name) if svc.healthSummary == 'GOOD': print 'OK' sys.exit(0) else: failed_checks = " ".join([ check['name'] for check in svc.healthChecks if check['summary'] != 'GOOD' ]) print 'Health is {0}. Failed checks: {1}'.format(svc.healthSummary, failed_checks)
def main(): resource = ApiResource("localhost", 7180, "cloudera", "cloudera", version=19) cluster = resource.get_cluster("Cloudera Quickstart") cm_manager = resource.get_cloudera_manager() cm_manager.update_config({'REMOTE_PARCEL_REPO_URLS': PARCEL_REPO}) cm_manager.update_all_hosts_config(JDK_CONFIG) time.sleep(5) for parcel in PARCELS: ParcelInstaller(parcel['name'], parcel['version']).install(cluster) print "Restarting cluster" cluster.stop().wait() cluster.start().wait() print "Done restarting cluster"
def main(): """ Enables HDFS HA on a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, version=6) if not validate_cluster(api, settings.cluster): write_to_stdout( "Cluster does not satisfy preconditions for enabling HDFS HA. Exiting!" ) return 1 if settings.wait_for_good_health: write_to_stdout("Waiting for GOOD health... ") if not wait_for_good_health(api, settings.cluster): write_to_stdout("Cluster health is not GOOD. Exiting!\n") return 1 else: write_to_stdout("Checking cluster health... ") if not check_health(api, settings.cluster): write_to_stdout("Cluster health is not GOOD. Exiting!\n") write_to_stdout("Cluster health is GOOD!\n") cluster = api.get_cluster(settings.cluster) invoke_hdfs_enable_nn_ha(cluster, settings.nameservice) update_hive_for_ha_hdfs(cluster) # Restarting the MGMT services to make sure the HDFS file browser functions # as expected. cloudera_manager = api.get_cloudera_manager() mgmt_service = cloudera_manager.get_service() wait_for_command('Restarting MGMT services', mgmt_service.restart()) return 0
def main(): """ Configures a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, settings.use_tls, 8) cluster = api.get_cluster(settings.cluster) configure_services(cluster) wait_for_command('Deploying client configs.', cluster.deploy_client_config()) wait_for_command('Restarting the cluster', cluster.stop()) wait_for_command('Restarting the cluster', cluster.start()) return 0
def main(): """ Configures a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, settings.use_tls, 8) cluster = api.get_cluster(settings.cluster) #manage_cm_users(api) users = ['test1', 'test2'] for user in users: print user api.create_user(user, 'nielsen#123', ['ROLE_USER']) return 0
class Credentials: def __init__(self, host, port, user, passw, version, cluster): self.api = ApiResource(str(host), port, username=str(user), password=str(passw), version=int(version)) cluster1 = self.api.get_cluster(cluster) self.service_list = cluster1.get_all_services() self.serviceList = [ "OOZIE", "KS_INDEXER", "SQOOP", "ZOOKEEPER", "HUE", "FLUME", "IMPALA", "HDFS", "SOLR", "HBASE", "YARN", "HIVE", "SPARK", "SENTRY" ] self.list = [] for service in self.service_list: if service.type in self.serviceList: self.list.append(service) self.dictionary = dict(zip(self.serviceList, self.list))
def main(): """ TODO: This probably needs some work. You get the idea though. An example of how to do a bulk config update to Cloudera Manager. This is helpful if you have a bunch of changes That you want to make but don't want to use the GUI. """ parser = argparse.ArgumentParser(description='Cloudera Manager Bulk Config Update Script') parser.add_argument('-H', '--host', '--hostname', action='store', dest='hostname', required=True, help='CM server host') parser.add_argument('-p', '--port', action='store', dest='port', type=int, default=7180, help='example: 7180') parser.add_argument('-u', '--user', '--username', action='store', dest='username', required=True, help='example: admin') parser.add_argument('-c', '--cluster', action='store', dest='cluster', required=True, help='example: hadrian-cluster') args = parser.parse_args() password = getpass.getpass('Please enter your Cloudera Manager passsword: ') # read configuration files: for i in os.listdir('./conf/' + args.cluster): config.read('./conf/' + args.cluster + '/' + i) api = ApiResource(args.hostname, args.port, args.username, password) cluster = api.get_cluster(args.cluster) services = cluster.get_all_services() # update services based with configuration file parameters for service in services: if config_grabber.has_section(service.type): service.update_config(svc_config=config_grabber(service.name + '-svc-config')) config_groups = config_grabber(service.name)['config_groups'] for config_group in config_groups.split(','): print section temp_config_group = service.get_role_config_group(section) temp_config_group.update_config(config_grabber(section)) else: print 'unknown service: ' + service.name print 'Starting final client configuration deployment for all services.' cmd = cluster.deploy_client_config() if not cmd.wait(CMD_TIMEOUT).success: print 'Failed to deploy client configuration.'
def main(): """ Enables HDFS HA on a cluster. @rtype: number @returns: A number representing the status of success. """ settings = retrieve_args() api = ApiResource(settings.host, settings.port, settings.username, settings.password, version=6) if not validate_cluster(api, settings.cluster): write_to_stdout("Cluster does not satisfy preconditions for enabling HDFS HA. Exiting!") return 1 if settings.wait_for_good_health: write_to_stdout("Waiting for GOOD health... ") if not wait_for_good_health(api, settings.cluster): write_to_stdout("Cluster health is not GOOD. Exiting!\n") return 1 else: write_to_stdout("Checking cluster health... ") if not check_health(api, settings.cluster): write_to_stdout("Cluster health is not GOOD. Exiting!\n") write_to_stdout("Cluster health is GOOD!\n") cluster = api.get_cluster(settings.cluster) invoke_hdfs_enable_nn_ha(cluster, settings.nameservice) update_hive_for_ha_hdfs(cluster) # Restarting the MGMT services to make sure the HDFS file browser functions # as expected. cloudera_manager = api.get_cloudera_manager() mgmt_service = cloudera_manager.get_service() wait_for_command('Restarting MGMT services', mgmt_service.restart()) return 0
def main(): """ This is an example script for printing the default configurations for a CM Service. It's rough, but it gets the job done. This is how you can see all of the settings you've made for service along iwth the defaults. Helpful if you are just curious what things look like. For a more Hadrian-ish way to export configurations, see ExportConfigs.py """ api = ApiResource('<cloudera manager server>', 7180, '<username>', '<password>') cluster = api.get_cluster('CM') service = cluster.get_service('<service name>') for i in service.get_all_role_config_groups(): print '--------------------------------------------------------' print i.name print '--------------------------------------------------------' for k, v in i.get_config('full').iteritems(): if v.value is None: print k + ' - default - ' + str(v.default) else: print k + ' - ' + str(v.value)
def main(): """ This is an example script for printing the default configurations for a CM Service. It's rough, but it gets the job done. This is how you can see all of the settings you've made for service along iwth the defaults. Helpful if you are just curious what things look like. For a more Hadrian-ish way to export configurations, see ExportConfigs.py """ api = ApiResource('<cloudera manager server>', 7180, '<username>', '<password>') cluster = api.get_cluster('CM') service = cluster.get_service('<service name>') for i in service.get_all_role_config_groups(): print '--------------------------------------------------------' print i.name print '--------------------------------------------------------' for k,v in i.get_config('full').iteritems(): if v.value is None: print k + ' - default - ' + str(v.default) else: print k + ' - ' + str(v.value)
def main(): """ Add peer to the cluster. @rtype: number @returns: A number representing the status of success. """ settings = parse_args() if len(sys.argv) == 1 or len(sys.argv) > 17: print_usage_message() quit(1) api = ApiResource(settings.server, settings.port, settings.username, settings.password, settings.use_tls, 14) target_hdfs_name = get_service_name('HDFS', api, settings.target_cluster_name) hdfs = api.get_cluster( settings.target_cluster_name).get_service(target_hdfs_name) cmd = hdfs.trigger_replication_schedule(settings.schedule_id) cmd.wait() result = hdfs.get_replication_schedule(settings.schedule_id).history[0] hdfsresult = hdfs.get_replication_schedule( settings.schedule_id).history[0].hdfsResult if result.success is False: print "###### Replication job failed #####" print "Yarn Job ID :" + str(hdfsresult.jobId) print "Job Details URL:" + str(hdfsresult.jobDetailsUri) print "Setup Error:" + str(hdfsresult.setupError) else: print "###### Replication job succeeded #####" print "Yarn Job ID :" + str(hdfsresult.jobId) print "Job Details URL:" + str(hdfsresult.jobDetailsUri) print "numFilesCopied:" + str(hdfsresult.numFilesCopied) print "numBytesCopied:" + str(hdfsresult.numBytesCopied) print "numFilesSkipped:" + str(hdfsresult.numFilesSkipped) print "numBytesSkipped:" + str(hdfsresult.numBytesSkipped) return 0
#hosts.append("master") #hosts.append("w01") #hosts.append("w02") #hosts.append("w03") hosts.append("ip-10-11-167-80") hosts.append("ip-10-153-224-197") hosts.append("ip-10-37-166-245") hosts.append("ip-10-169-69-118") cluster.add_hosts(hosts) # Downloads and distributes parcels # Had to recreate the cluster object as follows. For some reason doing a cluster.get_parcel was # failing while the cluster object was api.create_cluster() cluster = api.get_cluster("cloudera-pe-test") #parcel = cluster.get_parcel("CDH", "5.2.0-1.cdh5.2.0.p0.36") parcel = cluster.get_parcel("CDH", "5.2.0-1.cdh5.2.0.p0.36") parcel.start_download(); while True: parcel = cluster.get_parcel("CDH", "5.2.0-1.cdh5.2.0.p0.36") if parcel.stage != "DOWNLOADED": print "Downloading : %s / %s" % ( parcel.state.progress, parcel.state.totalProgress) else: break parcel.start_distribution() while True: parcel = cluster.get_parcel("CDH", "5.2.0-1.cdh5.2.0.p0.36") if parcel.stage != "DISTRIBUTED": print "Distributing: %s / %s" % ( parcel.state.progress, parcel.state.totalProgress)
class ClouderaManager(object): """ The complete orchestration of a cluster from start to finish assuming all the hosts are configured and Cloudera Manager is installed with all the required databases setup. Handle all the steps required in creating a cluster. All the functions are built to function idempotently. So you should be able to resume from any failed step but running thru the __class__.setup() """ def __init__(self, module, config, trial=False, license_txt=None): self.api = ApiResource(config['cm']['host'], username=config['cm']['username'], password=config['cm']['password']) self.manager = self.api.get_cloudera_manager() self.config = config self.module = module self.trial = trial self.license_txt = license_txt self.cluster = None def enable_license(self): """ Enable the requested license, either it's trial mode or a full license is entered and registered. """ try: _license = self.manager.get_license() except ApiException: print_json(type="LICENSE", msg="Enabling license") if self.trial: self.manager.begin_trial() else: if license_txt is not None: self.manager.update_license(license_txt) else: fail(self.module, 'License should be provided or trial should be specified') try: _license = self.manager.get_license() except ApiException: fail(self.module, 'Failed enabling license') print_json(type="LICENSE", msg="Owner: {}, UUID: {}".format(_license.owner, _license.uuid)) def create_cluster(self): """ Create a cluster and add hosts to the cluster. A new cluster is only created if another one doesn't exist with the same name. """ print_json(type="CLUSTER", msg="Creating cluster") cluster_config = self.config['cluster'] try: self.cluster = self.api.get_cluster(cluster_config['name']) except ApiException: print_json(type="CLUSTER", msg="Creating Cluster entity: {}".format(cluster_config['name'])) self.cluster = self.api.create_cluster(cluster_config['name'], cluster_config['version'], cluster_config['fullVersion']) cluster_hosts = [self.api.get_host(host.hostId).hostname for host in self.cluster.list_hosts()] hosts = [] for host in cluster_config['hosts']: if host not in cluster_hosts: hosts.append(host) self.cluster.add_hosts(hosts) def activate_parcels(self): print_json(type="PARCELS", msg="Setting up parcels") for parcel_cfg in self.config['parcels']: parcel = Parcels(self.module, self.manager, self.cluster, parcel_cfg.get('version'), parcel_cfg.get('repo'), parcel_cfg.get('product', 'CDH')) parcel.download() parcel.distribute() parcel.activate() @retry(attempts=20, delay=5) def wait_inspect_hosts(self, cmd): """ Inspect all the hosts. Basically wait till the check completes on all hosts. :param cmd: A command instance used for tracking the status of the command """ print_json(type="HOSTS", msg="Inspecting hosts") cmd = cmd.fetch() if cmd.success is None: raise ApiException("Waiting on command {} to finish".format(cmd)) elif not cmd.success: if (cmd.resultMessage is not None and 'is not currently available for execution' in cmd.resultMessage): raise ApiException('Retry Command') fail(self.module, 'Host inspection failed') print_json(type="HOSTS", msg="Host inspection completed: {}".format(cmd.resultMessage)) def deploy_mgmt_services(self): """ Configure, deploy and start all the Cloudera Management Services. """ print_json(type="MGMT", msg="Deploying Management Services") try: mgmt = self.manager.get_service() if mgmt.serviceState == 'STARTED': return except ApiException: print_json(type="MGMT", msg="Management Services don't exist. Creating.") mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo()) for role in config['services']['MGMT']['roles']: if not len(mgmt.get_roles_by_type(role['group'])) > 0: print_json(type="MGMT", msg="Creating role for {}".format(role['group'])) mgmt.create_role('{}-1'.format(role['group']), role['group'], role['hosts'][0]) for role in config['services']['MGMT']['roles']: role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format(role['group'])) role_group.update_config(role.get('config', {})) mgmt.start().wait() if self.manager.get_service().serviceState == 'STARTED': print_json(type="MGMT", msg="Management Services started") else: fail(self.module, "[MGMT] Cloudera Management services didn't start up properly") def service_orchestrate(self, services): """ Create, pre-configure provided list of services Stop/Start those services Perform and post service startup actions :param services: List of Services to perform service specific actions """ service_classes = [] # Create and pre-configure provided services for service in services: service_config = self.config['services'].get(service.upper()) if service_config: svc = getattr(sys.modules[__name__], service)(self.cluster, service_config) if not svc.started: svc.deploy() svc.pre_start() service_classes.append(svc) print_json(type="CLUSTER", msg="Starting services: {} on Cluster".format(services)) # Deploy all the client configs, since some of the services depend on other services # and is essential that the client configs are in place self.cluster.deploy_client_config() # Start each service and run the post_start actions for each service for svc in service_classes: # Only go thru the steps if the service is not yet started. This helps with # re-running the script after fixing errors if not svc.started: svc.start() svc.post_start() def setup(self): # TODO(rnirmal): Cloudera Manager SSL? # Enable a full license or start a trial self.enable_license() # Create the cluster entity and associate hosts self.create_cluster() # Download and activate the parcels self.activate_parcels() # Inspect all the hosts self.wait_inspect_hosts(self.manager.inspect_hosts()) # Create Management services self.deploy_mgmt_services() # Configure and Start base services self.service_orchestrate(BASE_SERVICES) # Configure and Start remaining services self.service_orchestrate(ADDITIONAL_SERVICES)
#!/usr/bin/env python import sys from cm_api.api_client import ApiResource CMD_TIMEOUT = 180 manager_host = sys.argv[1] cluster_name = sys.argv[2] action = sys.argv[3] api = ApiResource(manager_host, username="******", password="******", use_tls=False, version=4) cluster = api.get_cluster(cluster_name) if action == "format": hdfs = cluster.get_service("hdfs1") cmd = hdfs.format_hdfs("hdfs1_NAMENODE_1")[0] if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to format HDFS") elif action == "start": service_name = sys.argv[4] service = cluster.get_service(service_name) if service_name == "hdfs1" or service_name == "mapreduce1": ## TODO! Refactoring is needed service_config = {} if service_name == "hdfs1": # TODO: HACK!! disable dfs permissions service_config = {'dfs_permissions': False} if service_name == "mapreduce1":
#!/usr/bin/env python import sys import socket from cm_api.api_client import ApiResource from cm_api.api_client import ApiException CMD_TIMEOUT = 180 api = ApiResource(sys.argv[1], username="******", password="******", use_tls=False, version=4) cluster = api.get_cluster(sys.argv[2]) hdfs = cluster.get_service('hdfs1') hdfs_service_config = {'dfs_block_local_path_access_user': '******'} hdfs_roles_names = [] roles_types = hdfs.get_role_types() for role_type in roles_types: roles = hdfs.get_roles_by_type(role_type) for role in roles: hdfs_roles_names.append(role.name) hdfs.update_config(svc_config=hdfs_service_config) cmd_hdfs = hdfs.deploy_client_config(*hdfs_roles_names) if not cmd_hdfs.wait(CMD_TIMEOUT).success: raise Exception("Failed to deploy HDS client configuration")
def status(host, user, passw): cm_host = str(hostname) api = ApiResource(cm_host, 7180, username=str(username), password=str(password), version=9) # Get a list of all clusters cluster = api.get_cluster("Cloudera QuickStart") service_list = cluster.get_all_services() for service in service_list: if service.type == "OOZIE": print("===================================================") print("Entered Oozie") oozie_service = service print ("Located Oozie Service: " + service.name) print("State: " + oozie_service.serviceState + "\n" + "Health: " + oozie_service.healthSummary) if service.type == "KS_INDEXER": print ("===================================================") print("Entered ks_indexer") ks_indexer_service = service print ("Located ks_indexer Service: " + service.name) print("State: " + ks_indexer_service.serviceState + "\n" + "Health: " + ks_indexer_service.healthSummary) if service.type == "SQOOP": print ("===================================================") print("Entered sqoop") sqoop_service = service print ("Located sqoop Service: " + service.name) print("State: " + sqoop_service.serviceState + "\n" + "Health: " + sqoop_service.healthSummary) if service.type == "ZOOKEEPER": print ("===================================================") print("Entered zookeeper") zookeeper_service = service print ("Located zookeeper Service: " + service.name) print("State: " + zookeeper_service.serviceState + "\n" + "Health: " + zookeeper_service.healthSummary) if service.type == "HUE": print ("===================================================") print("Entered hue") hue_service = service print ("Located hue Service: " + service.name) print("State: " + hue_service.serviceState + "\n" + "Health: " + hue_service.healthSummary) if service.type == "FLUME": print ("===================================================") print("Entered flume") flume_service = service print ("Located flume Service: " + service.name) print("State: " + flume_service.serviceState + "\n" + "Health: " + flume_service.healthSummary) if service.type == "IMPALA": print ("===================================================") print("Entered impala") impala_service = service print ("Located impala Service: " + service.name) print("State: " + impala_service.serviceState + "\n" + "Health: " + impala_service.healthSummary) if service.type == "HDFS": print ("===================================================") print("Entered hdfs") hdfs_service = service print ("Located hdfs Service: " + service.name) print("State: " + hdfs_service.serviceState + "\n" + "Health: " + hdfs_service.healthSummary) if service.type == "SOLR": print ("===================================================") print("Entered solr") solr_service = service print ("Located solr Service: " + service.name) print("State: " + solr_service.serviceState + "\n" + "Health: " + solr_service.healthSummary) if service.type == "HBASE": print ("===================================================") print("Entered hbase") hbase_service = service print ("Located hbase Service: " + service.name) print("State: " + hbase_service.serviceState + "\n" + "Health: " + hbase_service.healthSummary) if service.type == "YARN": print ("===================================================") print("Entered yarn") yarn_service = service print ("Located yarn Service: " + service.name) print("State: " + yarn_service.serviceState + "\n" + "Health: " + yarn_service.healthSummary) if service.type == "HIVE": print ("===================================================") print("Entered hive") hive_service = service print ("Located hive Service: " + service.name) print("State: " + hive_service.serviceState + "\n" + "Health: " + hive_service.healthSummary) if service.type == "SPARK": print ("===================================================") print("Entered spark") spark_service = service print ("Located spark Service: " + service.name) print("State: " + spark_service.serviceState + "\n" + "Health: " + spark_service.healthSummary) if service.type == "SENTRY": print ("===================================================") print("Entered sentry") sentry_service = service print ("Located sentry Service: " + service.name) print("State: " + sentry_service.serviceState + "\n" + "Health: " + sentry_service.healthSummary)
#!/usr/bin/env python import socket import time from cm_api.api_client import ApiResource from cm_api.endpoints.services import ApiService from cm_api.endpoints.services import ApiServiceSetupInfo cm_host = 'ip-10-136-86-133' api = ApiResource(cm_host, username='******', password='******') cluster = api.get_cluster('cloudera-pe-test') ### HBase ### hbase_service_name = "HBASE" hbase_service_config = { 'hdfs_service': 'hdfs01', 'zookeeper_service': 'zookeeper01', } hbase_hm_host = "ip-10-136-86-133" hbase_hm_config = { } hbase_rs_hosts = [ ] hbase_rs_hosts.append("ip-10-153-224-197") hbase_rs_hosts.append("ip-10-169-69-118") hbase_rs_config = { 'hbase_hregion_memstore_flush_size': 1024000000, 'hbase_regionserver_handler_count': 10, 'hbase_regionserver_java_heapsize': 2048000000, 'hbase_regionserver_java_opts': '', } hbase_thriftserver_service_name = "HBASETHRIFTSERVER"
# # else : # print >>sys.stderr, 'Cannot replicate from that cluster!' # return -1 vm_version = cf['CM_VERSION'] API = ApiResource(cmHost, cf['CM_PORT'], version=cf['CM_VERSION'], username=cf['CM_USER'], password=cf['CM_PASSWD'], use_tls=False) LOG.debug('Connected to CM host on ' + cmHost) procUser = getUsername() LOG.debug('Process effective username is ' + procUser) procGroup= getGroupname() LOG.debug('Process effective group name is ' + procGroup) procUserGroups = getUserGroups(procUser) LOG.debug('All groups for user:'******', '.join(procUserGroups)) cluster = API.get_cluster(cf['CLUSTER_NAME']) if action == 'listRepls': print >>sys.stdout, '\n\tSearching replication schedules for user: '******' group(s): ' + ', '.join(procUserGroups) schedules = getAccessableSchedules(cf,cluster,procUser,procUserGroups) printReplicationSchedules(cf,schedules) return cf['RET_OK'] # get details about the replication the user is interested in if service == cf['HIVE_SERVICE']: path = getDatabaseLocation(cf,database) LOG.debug('DB location is ' + path) schedule = getHiveSchedule (cluster,service,database,table) else: schedule = getHdfsSchedule (cluster,service,path) path = schedule.hdfsArguments.sourcePath
def main(): module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS)) api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=10) cluster_name = CLUSTER_NAME manager = api.get_cloudera_manager() action_a = module.params.get('action', None) if action_a == 'create_cluster': license_a = module.params.get('license', None) version_a = module.params.get('version', None) cluster_list = [x.name for x in api.get_all_clusters()] if cluster_name in cluster_list: module.exit_json(changed=False, msg='Cluster exists') else: cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a) if license_a == None: manager.begin_trial() else: manager.update_license(license_a.decode('base64')) module.exit_json(changed=True, msg='Cluster created') elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster','create_snapshot_policy']: # more complicated actions that need a created cluster go here cluster = api.get_cluster(cluster_name) host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts()) # adds a host to the cluster # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal if action_a == 'add_host': host_a = module.params.get('host', None) host_list = host_map.keys() if host_a in host_list: module.exit_json(changed=False, msg='Host already in cluster') else: try: cluster.add_hosts([host_a]) except ApiException: # if a host isn't there, it could be because the agent didn't manage to connect yet # so let's wait a moment for it sleep(120) cluster.add_hosts([host_a]) module.exit_json(changed=True, msg='Host added') # create management service and set it's basic configuration # this needs a separate function since management is handled # differently than the rest of services elif action_a == 'create_mgmt': host_a = module.params.get('host', None) # getting the management service is the only way to check if mgmt exists # an exception means there isn't one try: mgmt = manager.get_service() module.exit_json(changed=False, msg='Mgmt service already exists') except ApiException: pass mgmt = manager.create_mgmt_service(ApiServiceSetupInfo()) # this is ugly... and I see no good way to unuglify it firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n") reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n") # since there is no easy way of configuring the manager... let's do it here :( role_conf = defaultdict(dict) role_conf['ACTIVITYMONITOR'] = { 'firehose_database_host': '{0}:7432'.format(host_a), 'firehose_database_user': '******', 'firehose_database_password': firehose_passwd, 'firehose_database_type': 'postgresql', 'firehose_database_name': 'amon', 'firehose_heapsize': '268435456', } role_conf['EVENTSERVER'] = { 'event_server_heapsize': '215964392' } role_conf['REPORTSMANAGER'] = { 'headlamp_database_host': '{0}:7432'.format(host_a), 'headlamp_database_user': '******', 'headlamp_database_password': reports_passwd, 'headlamp_database_type': 'postgresql', 'headlamp_database_name': 'rman', 'headlamp_heapsize': '215964392', } roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER'] # create mangement roles for role in roles: mgmt.create_role('{0}-1'.format(role), role, host_map[host_a]) # update configuration of each for group in mgmt.get_all_role_config_groups(): group.update_config(role_conf[group.roleType]) mgmt.start().wait() # after starting this service needs time to spin up sleep(30) module.exit_json(changed=True, msg='Mgmt created and started') # deploy a given parcel on all hosts in the cluster # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4 elif action_a == 'deploy_parcel': name_a = module.params.get('name', None) version_a = module.params.get('version', None) if "latest" in version_a: available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a] if "-latest" in version_a: version_substr = match('(.+?)-latest', version_a).group(1) # if version is just "latest", try to check everything else: version_substr = ".*" try: [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None] except ValueError: module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions)) else: version_parcel = version_a # we now go through various stages of getting the parcel # as there is no built-in way of waiting for an operation to complete # we use loops with sleep to get it done parcel = cluster.get_parcel(name_a, version_parcel) if parcel.stage == 'AVAILABLE_REMOTELY': parcel.start_download() while parcel.stage != 'DOWNLOADED': parcel = cluster.get_parcel(name_a, version_parcel) if parcel.state.errors: raise Exception(str(parcel.state.errors)) sleep(10) if parcel.stage == 'DOWNLOADED': parcel.start_distribution() while parcel.stage != 'DISTRIBUTED': parcel = cluster.get_parcel(name_a, version_parcel) if parcel.state.errors: raise Exception(str(parcel.state.errors)) # sleep while hosts report problems after the download for i in range(12): sleep(10) if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0: break # since parcels are distributed automatically when a new host is added to a cluster # we can encounter the ,,ACTIVATING'' stage then if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING': if parcel.stage == 'DISTRIBUTED': parcel.activate() while parcel.stage != 'ACTIVATED': parcel = cluster.get_parcel(name_a, version_parcel) # this sleep has to be large because although the operation is very fast # it makes the management and cloudera hosts go bonkers, failing all of the health checks sleep(10) # sleep while hosts report problems after the distribution for i in range(60): sleep(10) if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0: break module.exit_json(changed=True, msg='Parcel activated') if parcel.stage == 'ACTIVATED': module.exit_json(changed=False, msg='Parcel already activated') # if we get down here, something is not right module.fail_json(msg='Invalid parcel state') # deploy nodes for workers, according to SERVICE_WORKER_MAP # also give them sane names and init zookeeper and kafka ones # which need id's specified elif action_a == 'deploy_service_worker_nodes': host_a = module.params.get('host', None) service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] role_name = SERVICE_WORKER_MAP[service_a]['name'] full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring'] if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) else: service = cluster.get_service(service_name) nodes = [x for x in service.get_all_roles() if role_name in x.name] # if host already has the given group, we should skip it if host_map[host_a] in [x.hostRef.hostId for x in nodes]: module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name)) # find out the highest id that currently exists else: node_names = [x.name for x in nodes] if len(node_names) == 0: # if no nodes, start numbering from 1 node_i = 1 else: # take the max number and add 1 to it node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1 if service_name == 'ZOOKEEPER': role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a) # zookeeper needs a per-node ID in the configuration, so we set it now role.update_config({'serverId': node_i}) elif service_name == 'KAFKA': role = service.create_role(full_role_name.format(node_i), role_name, host_a) # kafka needs a per-node ID in the configuration, so we set it now role.update_config({'broker.id': node_i}) else: service.create_role(full_role_name.format(node_i), role_name, host_a) module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name)) # deploy a service. just create it, don't do anything more # this is needed maily when we have to set service properties before role deployment elif action_a == 'deploy_service': name_a = module.params.get('name', None) if not name_a in SERVICE_MAP: module.fail_json(msg='Unknown service: {0}'.format(name_a)) service_name = SERVICE_MAP[name_a] if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) module.exit_json(changed=True, msg='{0} service created'.format(service_name)) else: module.exit_json(changed=False, msg='{0} service already exists'.format(service_name)) # deploy the base hdfs roles (the namenode and secondary) # this doesn't create the service, as at least one datanode should already be added! # the format also requires certain properties to be set before we run it elif action_a == 'deploy_hdfs_base': nn_host_a = module.params.get('nn_host', None) sn_host_a = module.params.get('sn_host', None) changed = False hdfs = cluster.get_service('HDFS') hdfs_roles = [x.name for x in hdfs.get_all_roles()] # don't create a secondary namenode when: #- there is one that already exists #- there is a second namenode, which means we have HA and don't need a secondary if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles: hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a) changed = True # create a namenode and format it's FS # formating the namenode requires at least one datanode and secondary namenode already in the cluster! if not 'HDFS-NAMENODE' in hdfs_roles: hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a) for command in hdfs.format_hdfs('HDFS-NAMENODE'): if command.wait().success == False: module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage)) changed = True module.exit_json(changed=changed, msg='Created HDFS service & NN roles') # enable HttpFS for HDFS # HUE require this for support HA in HDFS elif action_a == 'deploy_hdfs_httpfs': host_a = module.params.get('host', None) hdfs = cluster.get_service('HDFS') hdfs_roles = [x.name for x in hdfs.get_all_roles()] # don't install second instance of HttpFS if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0: module.exit_json(changed=False, msg='HDFS HttpFS service already exists') hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) module.exit_json(changed=True, msg='HDFS HttpFS service created') # enable HA for HDFS # this deletes the secondary namenode and creates a second namenode in it's place # also, this spawns 3 journal node and 2 failover controller roles elif action_a == 'deploy_hdfs_ha': sn_host_a = module.params.get('sn_host', None) jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)] hdfs = cluster.get_service('HDFS') # if there's a second namenode, this means we already have HA enabled if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]: # this is bad and I should feel bad # jns is a list of dictionaries, each dict passes the required journalnode parameters jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': '/data0/hadoop/journal', 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)] # this call is so long because we set some predictable names for the sevices command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER', active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2') children = command.wait().children for command_children in children: # The format command is expected to fail, since we already formated the namenode if command_children.name != 'Format' and command.success == False: module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage)) module.exit_json(changed=True, msg='Enabled HA for HDFS service') else: module.exit_json(changed=False, msg='HDFS HA already enabled') # enable HA for YARN elif action_a == 'deploy_rm_ha': sn_host_a = module.params.get('sn_host', None) yarn = cluster.get_service('YARN') # if there are two roles matching to this name, this means HA for YARN is enabled if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1: command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER') children = command.wait().children for command_children in children: if command.success == False: module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage)) module.exit_json(changed=True, msg='Enabled HA for YARN service') else: module.exit_json(changed=False, msg='YARN HA already enabled') # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP elif action_a == 'deploy_base_roles': host_a = module.params.get('host', None) service_a = module.params.get('service', None) service_name = SERVICE_MAP[service_a] changed = False if not service_name in [x.name for x in cluster.get_all_services()]: service = cluster.create_service(service_name, service_name) else: service = cluster.get_service(service_name) service_roles = [x.name for x in service.get_all_roles()] # create each service from the map for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items(): # check if role already exists, script cant compare it directly # after enabling HA on YARN roles will have random strings in names if len([0 for x in service_roles if match(role_name, x) != None]) == 0: service.create_role(role_name, cloudera_name, host_a) changed = True # init commmands if role_name in SERVICE_INIT_COMMANDS.keys(): for command_to_run in SERVICE_INIT_COMMANDS[role_name]: # different handling of commands specified by name and # ones specified by an instance method if ismethod(command_to_run): command = command_to_run(service) else: command = service.service_command_by_name(command_to_run) if command.wait().success == False: module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage)) if changed == True: module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name)) else: module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name)) # set config values for a given service/role elif action_a == 'set_config': entity_a = module.params.get('entity', None) service_a = module.params.get('service', None) role_a = module.params.get('role', None) name_a = module.params.get('name', None) value_a = module.params.get('value', None) if not service_a in SERVICE_MAP: module.fail_json(msg='Unknown service: {0}'.format(service_a)) # since management is handled differently, it needs a different service if service_a == 'management': service = manager.get_service() elif service_a == 'cm': service = manager else: service = cluster.get_service(SERVICE_MAP[service_a]) # role and service configs are handled differently if entity_a == 'service': prev_config = service.get_config() curr_config = service.update_config({name_a: value_a}) if service_a == 'cm': prev_config = [prev_config] curr_config = [curr_config] module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a])) elif entity_a == 'role': if not role_a in ROLE_MAP: module.fail_json(msg='Unknown role: {0}'.format(service)) role = service.get_role_config_group(ROLE_MAP[role_a]) prev_config = role.get_config() curr_config = role.update_config({name_a: value_a}) module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a])) else: module.fail_json(msg='Invalid entity, must be one of service, role') # handle service state # currently this only can start/restart a service elif action_a == 'service': state_a = module.params.get('state', None) service_a = module.params.get('service', None) try: if service_a == 'cm': service = manager.get_service() else: service = cluster.get_service(SERVICE_MAP[service_a]) except ApiException: module.fail_json(msg='Service does not exist') # when starting a service, we also deploy the client config for it if state_a == 'started': if service.serviceState == 'STARTED': module.exit_json(changed=False, msg='Service already running') method = service.start verb = "start" elif state_a == 'restarted': method = service.restart verb = "restart" try: command = service.deploy_client_config() if command.wait().success == False: module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage)) # since there is no way to check if a service handles client config deployments # we try our best and pass the exception if it doesn't except ApiException, AttributeError: pass method().wait() # we need to wait for cloudera checks to complete... # otherwise it will report as failing sleep(10) for i in range(24): sleep(10) service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a]) if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD': break service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a]) if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD': module.exit_json(changed=True, msg='Service {0} successful'.format(verb)) else: module.fail_json(msg='Service {0} failed'.format(verb)) # handle cluster # currently this only can restart elif action_a == 'cluster': state_a = module.params.get('state', None) if state_a == 'restarted': command = cluster.restart(redeploy_client_configuration=True) if command.wait().success == False: module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage)) else: module.exit_json(changed=True, msg='Cluster restart successful') # Snapshot policy # only create is supported elif action_a == 'create_snapshot_policy': name_a = module.params.get('name', None) value_a = module.params.get('value', None) service_a = module.params.get('service', None) service = cluster.get_service(SERVICE_MAP[service_a]) payload=loads(value_a) # checking if policy already exists. Exception is expected when configure for the first time. try: test = service.get_snapshot_policy(name_a) module.exit_json(changed=False, msg='Defined policy already exists') except ApiException: pass try: command = service.create_snapshot_policy(payload) module.exit_json(changed=True, msg='Snapshot policy was created.') except ApiException, AttributeError: module.fail_json(msg='ERROR in creating snapshot policy.')
--- CHECK ROLES HEALTH STATUS START ---\n\n\n""" api = ApiResource(cm_host, 7180, user , password) # Get a list of all clusters cdh = None if (api == None): print "COnnect error" try: for c in api.get_all_clusters(): cdh = c print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," PRESENT CLUSTER: ",cdh.name except: print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()) )," Error get cluster" send_alert_mail("CONNECT_ERROR","","",""); if cdh != None: for s in api.get_cluster(cdh.name).get_all_services(): #print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," SERVICES: ",s.name cluster = api.get_cluster(cdh.name); service_mapred=cluster.get_service(s.name) roles=service_mapred.get_all_roles() for r in service_mapred.get_all_roles(): #print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," Role ",r.name," is in status [ ",r.healthSummary," ]" #check if not GOOD and not BAD if (r.healthSummary != "GOOD" and r.roleState == "STARTED" and r.healthSummary != "BAD"): send_alert_mail(r.healthSummary,r.name,r.hostRef.hostId,s.name) print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," SERVICES: ",s.name print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," Role ",r.name," is in status [ ",r.healthSummary," ]" print """\n\n\n --- CHECK ROLES HEALTH NOT GOOD STATUS END --- ====================================================================================""" except: print "Can't not connect to CDH API"
def set_up_cluster(): # get a handle on the instance of CM that we have running api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7) # get the CM instance cm = ClouderaManager(api) # activate the CM trial license cm.begin_trial() # create the management service service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT") cm.create_mgmt_service(service_setup) # install hosts on this CM instance cmd = cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url) print "Installing hosts. This might take a while." while cmd.success == None: sleep(5) cmd = cmd.fetch() if cmd.success != True: print "cm_host_install failed: " + cmd.resultMessage exit(0) print "cm_host_install succeeded" # first auto-assign roles and auto-configure the CM service cm.auto_assign_roles() cm.auto_configure() # create a cluster on that instance cluster = create_cluster(api, cluster_name, cdh_version) # add all our hosts to the cluster cluster.add_hosts(host_list) cluster = api.get_cluster("Cluster 1") parcels_list = [] # get and list all available parcels print "Available parcels:" for p in cluster.get_all_parcels(): print '\t' + p.product + ' ' + p.version if p.version.startswith(cdh_version_number) and p.product == "CDH": parcels_list.append(p) if len(parcels_list) == 0: print "No " + cdh_version + " parcel found!" exit(0) cdh_parcel = parcels_list[0] for p in parcels_list: if p.version > cdh_parcel.version: cdh_parcel = p # download the parcel print "Starting parcel download. This might take a while." cmd = cdh_parcel.start_download() if cmd.success != True: print "Parcel download failed!" exit(0) # make sure the download finishes while cdh_parcel.stage != 'DOWNLOADED': sleep(5) cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded" # distribute the parcel print "Starting parcel distribution. This might take a while." cmd = cdh_parcel.start_distribution() if cmd.success != True: print "Parcel distribution failed!" exit(0) # make sure the distribution finishes while cdh_parcel.stage != "DISTRIBUTED": sleep(5) cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " distributed" # activate the parcel cmd = cdh_parcel.activate() if cmd.success != True: print "Parcel activation failed!" exit(0) # make sure the activation finishes while cdh_parcel.stage != "ACTIVATED": cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " activated" # inspect hosts and print the result print "Inspecting hosts. This might take a few minutes." cmd = cm.inspect_hosts() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "Host inpsection failed!" exit(0) print "Hosts successfully inspected: \n" + cmd.resultMessage # create all the services we want to add; we will only create one instance # of each for s in service_types_and_names.keys(): service = cluster.create_service(service_types_and_names[s], s) # we will auto-assign roles; you can manually assign roles using the # /clusters/{clusterName}/services/{serviceName}/role endpoint or by using # ApiService.createRole() cluster.auto_assign_roles() cluster.auto_configure() # this will set up the Hive and the reports manager databases because we # can't auto-configure those two things hive = cluster.get_service(service_types_and_names["HIVE"]) hive_config = { "hive_metastore_database_host" : hive_metastore_host, \ "hive_metastore_database_name" : hive_metastore_name, \ "hive_metastore_database_password" : hive_metastore_password, \ "hive_metastore_database_port" : hive_metastore_database_port, \ "hive_metastore_database_type" : hive_metastore_database_type } hive.update_config(hive_config) # start the management service cm_service = cm.get_service() cm_service.start().wait() # this will set the Reports Manager database password # first we find the correct role rm_role = None for r in cm.get_service().get_all_roles(): if r.type == "REPORTSMANAGER": rm_role = r if rm_role == None: print "No REPORTSMANAGER role found!" exit(0) # then we get the corresponding role config group -- even though there is # only once instance of each CM management service, we do this just in case # it is not placed in the base group rm_role_group = rm_role.roleConfigGroupRef rm_rcg = get_role_config_group(api, rm_role.type, \ rm_role_group.roleConfigGroupName, None) # update the appropriate fields in the config rm_rcg_config = { "headlamp_database_host" : reports_manager_host, \ "headlamp_database_name" : reports_manager_name, \ "headlamp_database_user" : reports_manager_username, \ "headlamp_database_password" : reports_manager_password, \ "headlamp_database_type" : reports_manager_database_type } rm_rcg.update_config(rm_rcg_config) # restart the management service with new configs cm_service.restart().wait() # execute the first run command print "Excuting first run command. This might take a while." cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "The first run command failed: " + cmd.resultMessage() exit(0) print "First run successfully executed. Your cluster has been set up!"
def main(): """ TODO: This probably needs some work. You get the idea though. An example of how to do a bulk config update to Cloudera Manager. This is helpful if you have a bunch of changes That you want to make but don't want to use the GUI. """ parser = argparse.ArgumentParser( description='Cloudera Manager Bulk Config Update Script') parser.add_argument('-H', '--host', '--hostname', action='store', dest='hostname', required=True, help='CM server host') parser.add_argument('-p', '--port', action='store', dest='port', type=int, default=7180, help='example: 7180') parser.add_argument('-u', '--user', '--username', action='store', dest='username', required=True, help='example: admin') parser.add_argument('-c', '--cluster', action='store', dest='cluster', required=True, help='example: hadrian-cluster') args = parser.parse_args() password = getpass.getpass( 'Please enter your Cloudera Manager passsword: ') # read configuration files: for i in os.listdir('./conf/' + args.cluster): config.read('./conf/' + args.cluster + '/' + i) api = ApiResource(args.hostname, args.port, args.username, password) cluster = api.get_cluster(args.cluster) services = cluster.get_all_services() # update services based with configuration file parameters for service in services: if config_grabber.has_section(service.type): service.update_config(svc_config=config_grabber(service.name + '-svc-config')) config_groups = config_grabber(service.name)['config_groups'] for config_group in config_groups.split(','): print section temp_config_group = service.get_role_config_group(section) temp_config_group.update_config(config_grabber(section)) else: print 'unknown service: ' + service.name print 'Starting final client configuration deployment for all services.' cmd = cluster.deploy_client_config() if not cmd.wait(CMD_TIMEOUT).success: print 'Failed to deploy client configuration.'
usage() return RET_BADOPTS # check argument compatibility if args: print >>sys.stderr, '\n\tUnknown trailing argument:', args usage() return RET_BADOPTS if path == None : print >>sys.stderr, '\n\tPlease specify a pathe.' usage() return RET_BADOPTS API = ApiResource(cmHost, CM_PORT, version=CM_VERSION, username=CM_USER, password=CM_PASSWD, use_tls=True) LOG.debug('Connected to CM host on ' + cmHost) procUser = getUsername() LOG.debug('Process effective username is ' + procUser) cluster = API.get_cluster(CLUSTER_NAME) return RET_OK # # The 'main' entry # if __name__ == '__main__': sys.exit(main(sys.argv))
from cm_api.api_client import ApiResource CM_HOST = "127.0.0.1" ADMIN_USER = "******" ADMIN_PASS = "******" API = ApiResource(CM_HOST, version=14, username=ADMIN_USER, password=ADMIN_PASS) MANAGER = API.get_cloudera_manager() mgmt = MANAGER.get_service() print "restart mgmt..." mgmt.restart().wait() print "TIP cluster..." tip = API.get_cluster("TIP") tip.restart().wait()
class ClouderaManager(object): """ The complete orchestration of a cluster from start to finish assuming all the hosts are configured and Cloudera Manager is installed with all the required databases setup. Handle all the steps required in creating a cluster. All the functions are built to function idempotently. So you should be able to resume from any failed step but running thru the __class__.setup() """ def __init__(self, module, config, trial=False, license_txt=None): self.api = ApiResource(config['cm']['host'], username=config['cm']['username'], password=config['cm']['password']) self.manager = self.api.get_cloudera_manager() self.config = config self.module = module self.trial = trial self.license_txt = license_txt self.cluster = None def enable_license(self): """ Enable the requested license, either it's trial mode or a full license is entered and registered. """ try: _license = self.manager.get_license() except ApiException: print_json(type="LICENSE", msg="Enabling license") if self.trial: self.manager.begin_trial() else: if license_txt is not None: self.manager.update_license(license_txt) else: fail( self.module, 'License should be provided or trial should be specified' ) try: _license = self.manager.get_license() except ApiException: fail(self.module, 'Failed enabling license') print_json(type="LICENSE", msg="Owner: {}, UUID: {}".format(_license.owner, _license.uuid)) def create_cluster(self): """ Create a cluster and add hosts to the cluster. A new cluster is only created if another one doesn't exist with the same name. """ print_json(type="CLUSTER", msg="Creating cluster") cluster_config = self.config['cluster'] try: self.cluster = self.api.get_cluster(cluster_config['name']) except ApiException: print_json(type="CLUSTER", msg="Creating Cluster entity: {}".format( cluster_config['name'])) self.cluster = self.api.create_cluster( cluster_config['name'], cluster_config['version'], cluster_config['fullVersion']) cluster_hosts = [ self.api.get_host(host.hostId).hostname for host in self.cluster.list_hosts() ] hosts = [] for host in cluster_config['hosts']: if host not in cluster_hosts: hosts.append(host) self.cluster.add_hosts(hosts) def activate_parcels(self): print_json(type="PARCELS", msg="Setting up parcels") for parcel_cfg in self.config['parcels']: parcel = Parcels(self.module, self.manager, self.cluster, parcel_cfg.get('version'), parcel_cfg.get('repo'), parcel_cfg.get('product', 'CDH')) parcel.download() parcel.distribute() parcel.activate() @retry(attempts=20, delay=5) def wait_inspect_hosts(self, cmd): """ Inspect all the hosts. Basically wait till the check completes on all hosts. :param cmd: A command instance used for tracking the status of the command """ print_json(type="HOSTS", msg="Inspecting hosts") cmd = cmd.fetch() if cmd.success is None: raise ApiException("Waiting on command {} to finish".format(cmd)) elif not cmd.success: if (cmd.resultMessage is not None and 'is not currently available for execution' in cmd.resultMessage): raise ApiException('Retry Command') fail(self.module, 'Host inspection failed') print_json(type="HOSTS", msg="Host inspection completed: {}".format( cmd.resultMessage)) def deploy_mgmt_services(self): """ Configure, deploy and start all the Cloudera Management Services. """ print_json(type="MGMT", msg="Deploying Management Services") try: mgmt = self.manager.get_service() if mgmt.serviceState == 'STARTED': return except ApiException: print_json(type="MGMT", msg="Management Services don't exist. Creating.") mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo()) for role in config['services']['MGMT']['roles']: if not len(mgmt.get_roles_by_type(role['group'])) > 0: print_json(type="MGMT", msg="Creating role for {}".format(role['group'])) mgmt.create_role('{}-1'.format(role['group']), role['group'], role['hosts'][0]) for role in config['services']['MGMT']['roles']: role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format( role['group'])) role_group.update_config(role.get('config', {})) mgmt.start().wait() if self.manager.get_service().serviceState == 'STARTED': print_json(type="MGMT", msg="Management Services started") else: fail( self.module, "[MGMT] Cloudera Management services didn't start up properly") def service_orchestrate(self, services): """ Create, pre-configure provided list of services Stop/Start those services Perform and post service startup actions :param services: List of Services to perform service specific actions """ service_classes = [] # Create and pre-configure provided services for service in services: service_config = self.config['services'].get(service.upper()) if service_config: svc = getattr(sys.modules[__name__], service)(self.cluster, service_config) if not svc.started: svc.deploy() svc.pre_start() service_classes.append(svc) print_json(type="CLUSTER", msg="Starting services: {} on Cluster".format(services)) # Deploy all the client configs, since some of the services depend on other services # and is essential that the client configs are in place self.cluster.deploy_client_config() # Start each service and run the post_start actions for each service for svc in service_classes: # Only go thru the steps if the service is not yet started. This helps with # re-running the script after fixing errors if not svc.started: svc.start() svc.post_start() def setup(self): # TODO(rnirmal): Cloudera Manager SSL? # Enable a full license or start a trial self.enable_license() # Create the cluster entity and associate hosts self.create_cluster() # Download and activate the parcels self.activate_parcels() # Inspect all the hosts self.wait_inspect_hosts(self.manager.inspect_hosts()) # Create Management services self.deploy_mgmt_services() # Configure and Start base services self.service_orchestrate(BASE_SERVICES) # Configure and Start remaining services self.service_orchestrate(ADDITIONAL_SERVICES)
def main(argv): CM_HOST = "localhost" CM_PORT = 7180 CM_USER = "******" CM_PASSWD = "admin" CMD_TIMEOUT = 180 #Configurations HDFS_CONF = { } NAMENODE_CONF = { 'dfs_name_dir_list': '/dfs/nn', 'dfs_namenode_servicerpc_address': 8022, 'namenode_java_heapsize': 154140672, } SECONDARY_CONF = { 'fs_checkpoint_dir_list': '/dfs/snn', 'secondary_namenode_java_heapsize': 154140672, } DATANODE_CONF = { 'dfs_data_dir_list': '/data/1/dfs/dn,/data/2/dfs/dn,/data/3/dfs/dn', 'dfs_datanode_handler_count': 10, 'dfs_datanode_du_reserved': 2180395417, 'dfs_datanode_max_locked_memory': 983564288, 'datanode_java_heapsize': 286261248, } YARN_CONF = { 'hdfs_service':'hdfs', } RSRCMAN_CONF = { 'resource_manager_java_heapsize': 154140672, 'yarn_scheduler_maximum_allocation_mb': 1513, 'yarn_scheduler_maximum_allocation_vcores': 2, } JOBHIST_CONF = { 'mr2_jobhistory_java_heapsize': 154140672, } NODEMAN_CONF = { 'yarn_nodemanager_local_dirs': '/yarn/nm', 'yarn_nodemanager_resource_cpu_vcores': 2, 'yarn_nodemanager_resource_memory_mb': 1513, } #Parser Options parser = OptionParser() parser.set_defaults(action='') parser.add_option("-a", "--add", action="store_const", const="add", dest="action", help="add the list of hosts to the named cluster") parser.add_option("-r", "--remove", action="store_const", const="remove", dest="action", help="remove the list of hosts from the named cluster") parser.add_option("-d", "--deploy", action="store_const", const="deploy", dest="action", help="deploy the list of hosts as a new cluster with the given name") parser.add_option("--delete", action="store_const", const="delete", dest="action", help="delete the named cluster") parser.add_option("--name", dest="name", help="declare the cluster name to be created or to interact with") parser.add_option("--hosts", dest="hosts", help="comma delimited list of hosts to be added/removed") (opts,args) = parser.parse_args() CLUSTER_NAME = opts.name if opts.hosts and len(opts.hosts) > 1: HOSTNAMES = opts.hosts.split(",") elif opts.hosts: HOSTNAMES = opts.hosts else: HOSTNAMES = '' ROLEHASH = [] if HOSTNAMES: for host in HOSTNAMES: ROLEHASH.append(hashlib.md5(host).hexdigest()) api = ApiResource(CM_HOST, CM_PORT, CM_USER, CM_PASSWD) #Deploy a new cluster if(opts.action == "deploy"): #Create Cluster print "Creating cluster..." cluster = api.create_cluster(CLUSTER_NAME, "CDH5") cluster.add_hosts(HOSTNAMES) #Create HDFS Service and Roles print "Creating HDFS Service and Roles..." hdfs = cluster.create_service("hdfs", "HDFS") namenode = hdfs.create_role("hdfs-NAMENODE-" + ROLEHASH[0], "NAMENODE", HOSTNAMES[0]) secnamenode = hdfs.create_role("hdfs-SECONDARYNAMENODE-" + ROLEHASH[0], "SECONDARYNAMENODE", HOSTNAMES[0]) for i in range(len(HOSTNAMES)-1): datanode = hdfs.create_role("hdfs-DATANODE-" + ROLEHASH[i+1], "DATANODE", HOSTNAMES[i+1]) #Configure HDFS print "Configuring HDFS..." hdfs.update_config(svc_config = HDFS_CONF) for roleGroup in hdfs.get_all_role_config_groups(): if roleGroup.roleType == "NAMENODE": roleGroup.update_config(NAMENODE_CONF) elif roleGroup.roleType == "SECONDARYNAMENODE": roleGroup.update_config(SECONDARY_CONF) elif roleGroup.roleType == "DATANODE": roleGroup.update_config(DATANODE_CONF) #Start HDFS #format_hdfs takes a list of NameNodes print "Formatting HDFS..." cmd = hdfs.format_hdfs('hdfs-NAMENODE-' + ROLEHASH[0])[0] if not cmd.wait(CMD_TIMEOUT).success: print "Failed to format HDFS" print "Starting HDFS..." cmd = hdfs.start() if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to start HDFS") cmd = hdfs.create_hdfs_tmp() if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to create HDFS /tmp") for role in hdfs.get_all_roles(): cmd = hdfs.deploy_client_config(role.name) if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to deploy client config. Role: " + role.name) #Create YARN Service and Roles print "Creating YARN Service and Roles..." yarn = cluster.create_service("yarn", "YARN") resourceman = yarn.create_role("yarn-RESOURCEMANAGER-" + ROLEHASH[0], "RESOURCEMANAGER", HOSTNAMES[0]) jobhist = yarn.create_role("yarn-JOBHISTORY-" + ROLEHASH[0], "JOBHISTORY", HOSTNAMES[0]) for i in range(len(HOSTNAMES)-1): nodeman = yarn.create_role("yarn-NODEMANAGER-" + ROLEHASH[i+1], "NODEMANAGER", HOSTNAMES[i+1]) #Configure YARN print "Configuring YARN..." yarn.update_config(svc_config = YARN_CONF) for roleGroup in yarn.get_all_role_config_groups(): if roleGroup.roleType == "RESOURCEMANAGER": roleGroup.update_config(RSRCMAN_CONF) elif roleGroup.roleType == "JOBHISTORY": roleGroup.update_config(JOBHIST_CONF) elif roleGroup.roleType == "NODEMANAGER": roleGroup.update_config(NODEMAN_CONF) #Start YARN print "Starting YARN..." cmd = yarn.create_yarn_job_history_dir() if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to create Job History Directory") cmd = yarn.create_yarn_node_manager_remote_app_log_dir() if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to create NodeManager remote application log directory") cmd = yarn.start() if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to start YARN") for role in yarn.get_all_roles(): cmd = yarn.deploy_client_config(role.name) if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to deploy client config. Role: " + role.name) #SUCCESS! print "Cluster succesfully deployed." #Add new nodes elif(opts.action == "add"): print "Adding hosts..." cluster = api.get_cluster(CLUSTER_NAME); cluster.add_hosts(HOSTNAMES); print "Configurng HDFS Roles..." hdfs = cluster.get_service("hdfs") for i in range(len(HOSTNAMES)): datanode = hdfs.create_role("hdfs-DATANODE-" + ROLEHASH[i], "DATANODE", HOSTNAMES[i]) datanode.update_config(DATANODE_CONF); cmds = hdfs.start_roles("hdfs-DATANODE-" + ROLEHASH[i]) for cmd in cmds: if not cmd.wait(CMD_TIMEOUT).success: raise Exception(cmd.name) cmd = hdfs.deploy_client_config("hdfs-DATANODE-" + ROLEHASH[i]) if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to deploy client config hdfs-DATANODE-" + ROLEHASH[i]) print "Configuring YARN roles..." yarn = cluster.get_service("yarn") for i in range(len(HOSTNAMES)): nodeman = yarn.create_role("yarn-NODEMANAGER-" + ROLEHASH[i], "NODEMANAGER", HOSTNAMES[i]) nodeman.update_config(NODEMAN_CONF) cmds = yarn.start_roles("yarn-NODEMANAGER-" + ROLEHASH[i]) for cmd in cmds: if not cmd.wait(CMD_TIMEOUT).success: raise Exception(cmd.name) cmd = yarn.deploy_client_config("yarn-NODEMANAGER-" + ROLEHASH[i]) if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to deploy client config yarn-NODEMANAGER-" + ROLEHASH[i]) #print "Restarting HDFS service..." #cmd = hdfs.restart() #if not cmd.wait(CMD_TIMEOUT).success: # raise Exception("Failed to restart HDFS") #print "Restarting YARN service..." #cmd = yarn.restart() #if not cmd.wait(CMD_TIMEOUT).success: # raise Exception("Failed to restart YARN") #SUCCESS! print "Nodes successfully added" #Remove nodes elif(opts.action == "remove"): cluster = api.get_cluster(CLUSTER_NAME); hdfs = cluster.get_service("hdfs") yarn = cluster.get_service("yarn") print "Decommissioning Roles..." for role in ROLEHASH: cmd = yarn.decommission("yarn-NODEMANAGER-" + role) if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to decommission role yarn-NODEMANAGER" + role) cmd = hdfs.decommission("hdfs-DATANODE-" + role) if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to decommission role hdfs-DATANODE-" + role) print "Deleting Nodes..." for role in ROLEHASH: hdfs.delete_role("hdfs-DATANODE-" + role) yarn.delete_role("yarn-NODEMANAGER-" + role) for hostname in HOSTNAMES: cluster.remove_host(hostname); #SUCCESS print "Nodes successfull removed." #Delete Cluster elif(opts.action == "delete"): cluster = api.get_cluster(CLUSTER_NAME); hdfs = cluster.get_service("hdfs") yarn = cluster.get_service("yarn") print "Stopping YARN..." cmd = yarn.stop() if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to stop YARN") print "Stopping HDFS..." cmd = hdfs.stop() if not cmd.wait(CMD_TIMEOUT).success: raise Exception("Failed to stop HDFS") print "Deleting Cluster..." api.delete_cluster(CLUSTER_NAME) #SUCCESS print "Cluster successfully deleted." else: print "PLEASE SELECT A CORRECT OPTION" parser.print_help()
from cm_api.api_client import ApiResource from cm_api.endpoints.cms import ClouderaManager cm_host = "127.0.0.1" api = ApiResource(cm_host, username="******", password="******") cms = ClouderaManager(api) cmd = cms.get_service().restart() cmd = cmd.wait() print "Cloudera Manager Restart. Active: %s. Success: %s" % (cmd.active, cmd.success) cluster = api.get_cluster("Spark") print cluster restart_cluster = cluster.restart() restart_cluster = restart_cluster.wait() print "Cluster %s. Status - restart success: %s." % (cluster.name, restart_cluster.success) print "Cluster %s. Status - Configuration Stale -- Redeploying configurations" % cluster.name redeploy_config = cluster.deploy_client_config().wait() redeploy_config = redeploy_config.wait() print "New configuration success: %s." % redeploy_config.success
class DeployCloudEraCluster(object): """ This class to define and setup the base properties of the cluster node for hadoop echo system """ _cloudera_manager_host = None _port_number = None _user_name = None _password = None _version = 12 def __init__(self, cloudera_manager_host, port_number, user_name, password, version): """ Initialize the object to provision the cluster node for the hadoop parcel based provision :param cloudera_manager_host: :param port_number: :param user_name: :param password: :param version: """ self._cloudera_manager_host = cloudera_manager_host self._port_number = port_number self._user_name = user_name self._password = password self._version = version # API version vary depending upon the job you want to perform. "1" if you want to check the cluster and 12 if you want to export the property of config self._cloudera_manager_oconnect = ApiResource( self._cloudera_manager_host, self._port_number, self._user_name, self._password, version=self._version) def get_cluster_versions(self): """ To get all the provisioned cluster versions against the Cloud era manager :return: """ for cluster in self._cloudera_manager_oconnect: print("%s = %s" % (cluster.name, cluster.version)) return cluster def get_cluster_services(self, cdh_version): """ To get all the provisioned cluster services against the specific cluster :return: """ for srv in cdh_version.get_all_services(): print srv if srv.type == "HDFS": hdfs = srv print hdfs.name, hdfs.serviceState, hdfs.healthSummary print hdfs.serviceUrl for chk in hdfs.healthChecks: print "%s --- %s" % (chk['name'], chk['summary']) def get_cluster_roles_info(self, cdh_version): """ To get the details of all the roles for each cluster node :return: """ for role in cdh_version.get_all_roles(): if role.type == 'NAMENODE': namenode = role print "Role name: %s\nState: %s\nHealth: %s\nHost: %s" % ( namenode.name, namenode.roleState, namenode.healthSummary, namenode.hostRef.hostId) def get_cdh_metrics_details(self, cdh_version): """ To get the CDH metrics containing details about all the activities in the cluster node :param cdh_version: :return: """ metrics = cdh_version.get_metrics() for metric in metrics: print "%s (%s)" % (metric.name, metric.unit) def start_service(self, cdh_service_name): """ To start or stop the CDH service :param cdh_service_name: :return: """ service = cdh_service_name.restart() print service.active service_status = service.wait() print "Active: %s. Success: %s" % (service_status.active, service_status.success) def restart_service(self, cdh_service_name, namenode): """ To restart the service of the specific role :param cdh_service_name: :param namenode: :return: """ commands = cdh_service_name.restart_roles(namenode.name) for command in commands: print command def configure_services(self, cdh_service_name): """ To configure the specific services with available roles :return: """ for name, config in cdh_service_name.get_config( view='full')[0].items(): print "%s - %s - %s" % (name, config.relatedName, config.description) def export_cluster_template(self, template_filename, cluster_name): """ To export the current cluster configuration into the given file. :param template_filename: :return: """ cluster = self._cloudera_manager_oconnect.get_cluster(cluster_name) cdh_template = cluster.export() with open(template_filename, 'w') as outfile: json.dump(cdh_template.to_json_dict(), outfile, indent=4, sort_keys=True) def import_cluster_template(self, template_filename, cluster_name): """ To import cluster template configuration into given cluster :param template_filename: :param cluster_name: :return: """ cluster = self._cloudera_manager_oconnect.get_cluster(cluster_name) with open(template_filename) as data_file: data = json.load(data_file) template = ApiClusterTemplate(cluster).from_json_dict(data, cluster) cms = ClouderaManager(cluster) command = cms.import_cluster_template(template) print(command) def deploy_cloudera_manager_services(self): """ To deploy the cloudera manager services :return: """ varEnableConfigAlerts = True varServiceGroupName = "cloudera-scm" varServiceUserName = "******" varMgmtServiceConfig = { 'enable_config_alerts': varEnableConfigAlerts, 'process_groupname': varServiceGroupName, 'process_username': varServiceUserName, } varManager = self._cloudera_manager_oconnect.get_cloudera_manager() varMgmt = varManager.create_mgmt_service(ApiServiceSetupInfo()) # update the cloudera service config varMgmt.update_config(varMgmtServiceConfig) # Get the cloudera services configured services = varManager.get_service() varMgmt.create_role("ACTIVITYMONITOR-1", "ACTIVITYMONITOR", self._cloudera_manager_host) varMgmt.create_role("ALERTPUBLISHER-1", "ALERTPUBLISHER", self._cloudera_manager_host) varMgmt.create_role("EVENTSERVER-1", "EVENTSERVER", self._cloudera_manager_host) varMgmt.create_role("HOSTMONITOR-1", "HOSTMONITOR", self._cloudera_manager_host) varMgmt.create_role("SERVICEMONITOR-1", "SERVICEMONITOR", self._cloudera_manager_host) varMgmt.create_role("REPORTSMANAGER-1", "REPORTSMANAGER", self._cloudera_manager_host) def deploy_activity_monitor(self): """ To deploy the Activity monitor services :return: """ varActivityMonitorPassword = "******" varMgmt = self._cloudera_manager_oconnect.get_service() # config for the activity monitoring varActivityMonitorConfig = { 'firehose_database_host': "pocd-cm581-dev-manager.poc-d.internal" + ":" + "7432", 'firehose_database_user': "******", 'firehose_database_password': varActivityMonitorPassword, 'firehose_database_type': "postgresql", 'firehose_database_name': "amon", 'firehose_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': "true", } varRole = varMgmt.get_role("ACTIVITYMONITOR-1") varRole.update_config(varActivityMonitorConfig) def deploy_alert_publisher(self): """ To deploy the alert publisher :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varAlertPublisherConfig = { 'alert_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-alertpublisher", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("ALERTPUBLISHER-1") varRole.update_config(varAlertPublisherConfig) def deploy_event_server(self): """ To deploy event server :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varEventServerConfig = { 'event_server_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-eventserver", 'eventserver_index_dir': "/opt/cloudera/lib/cloudera-scm-eventserver", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("EVENTSERVER-1") varRole.update_config(varEventServerConfig) def deploy_host_monitor(self): """ To deploy host monitor :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varHostMonitorConfig = { 'firehose_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose", 'firehose_storage_dir': "/opt/cloudera/lib/cloudera-host-monitor", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("HOSTMONITOR-1") varRole.update_config(varHostMonitorConfig) def deploy_service_monitor(self): """ To deploy the service monitor :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varServiceMonitorConfig = { 'firehose_heapsize': 268435456, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose", 'firehose_storage_dir': "/opt/cloudera/lib/cloudera-service-monitor", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("SERVICEMONITOR-1") varRole.update_config(varServiceMonitorConfig) def deploy_report_manager(self): """ To deploy the service Report Manager :return: """ varReportManagerPassword = "******" varMgmt = self._cloudera_manager_oconnect.get_service() varReportManagerConfig = { 'headlamp_database_host': "pocd-cm581-dev-manager.poc-d.internal" + ":" + "7432", 'headlamp_database_user': "******", 'headlamp_database_password': varReportManagerPassword, 'headlamp_database_type': "postgresql", 'headlamp_database_name': "rman", 'headlamp_heapsize': 536870912, 'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-headlamp", 'headlamp_scratch_dir': "/opt/cloudera/lib/cloudera-scm-headlamp", 'oom_heap_dump_dir': "/tmp", 'oom_heap_dump_enabled': False, 'max_log_backup_index': 10, 'max_log_size': 100, 'log_threshold': "INFO", 'enable_config_alerts': True, } varRole = varMgmt.get_role("REPORTSMANAGER-1") varRole.update_config(varReportManagerConfig) def deploy_services(self): """ To deploy all the cloudera manager services :return: """ varMgmt = self._cloudera_manager_oconnect.get_service() varMgmt.start().wait() def create_hadoop_cluster(self): """ To create hadoop cluster with multiple data and name nodes and configure different services :return: """ varClusterName = "POC-D Cluster" varCDHVersion = "CDH5" varCDHFullVersion = "5.8.0" varCluster = varApiResource.create_cluster(varClusterName, varCDHVersion, varCDHFullVersion)
#!/usr/bin/env python import sys from cm_api.api_client import ApiResource from cm_api.api_client import ApiException CMD_TIMEOUT = 180 api = ApiResource(sys.argv[1], username="******", password="******", use_tls=False, version=4) cluster = api.get_cluster(sys.argv[2]) try: flume = cluster.get_service("flume1") except ApiException: flume = cluster.create_service("flume1", "FLUME") for i in xrange(3, len(sys.argv)): name = "flume-agent" + str(i - 2) try: flume.get_role(name) except ApiException: flume.create_role(name, "AGENT", sys.argv[i]) flume_service_config = { 'hbase_service': 'hbase1', 'hdfs_service': 'hdfs1' } flume.update_config(svc_config=flume_service_config)
def api_data_collection(request): """ Application information collection restful api. Query completed application information on specific conditions and accumulate it. @method: GET @param from_time: Application finish time after this time. format : "%d/%m/%Y %H:%M". time zone=UTC+8 @param end_time: Application finish time before this time. format : "%d/%m/%Y %H:%M". time zone=UTC+8 @param queue_name: Query completed application information on specific queue name. @param app_type: Query completed application information on specific application type. @param app_state: Query completed application information on specific application states. specified as a comma-separated list. ex: FINISHED,FAILED,KILLED @return: json data { "success":False, "message":"error message" } { "success":True, "message": { "queue_view":{...}, "group_view":{...} } } """ if request.method == "GET": response = {'success':False, 'message':''} filter_dict = {} if "queue_name" in request.GET: filter_dict['queue_name'] = request.GET.get('queue_name') if "app_type" in request.GET: filter_dict['app_type'] = request.GET.get('app_type') if "app_state" in request.GET: filter_dict['app_state'] = request.GET.get('app_state').split(',') # # time zone = Asia/Taipei = UTC+8 from_time = datetime.strptime(request.GET.get('from_time'), "%d/%m/%Y %H:%M") - timedelta(hours=8) to_time = datetime.strptime(request.GET.get('end_time'), "%d/%m/%Y %H:%M") - timedelta(hours=8) # # get config config = ConfigParser.ConfigParser() config.read( os.path.join(settings.BASE_DIR, "cluster.ini") ) cm_host = config.get("CM", "cm.host") cm_port = config.get("CM", "cm.port") cm_version = config.get("CM", "cm.version") cm_username = config.get("CM", "cm.username") cm_password = config.get("CM", "cm.password") # cluster_name = config.get("Cluster", "cluster.name") yarn_name = config.get("Cluster", "cluster.yarn.name") # ldap_host = config.get("Ldap", "ldap.host") ldap_username = config.get("Ldap", "ldap.username") ldap_password = config.get("Ldap", "ldap.password") # # get active resource manager info try: cm_api = ApiResource( cm_host, int(cm_port), username=cm_username, password=cm_password, version=int(cm_version) ) cm_cluster_obj = cm_api.get_cluster(name=cluster_name) cm_yarn_obj = cm_cluster_obj.get_service(name=yarn_name) # find_active_rm = False for rm in cm_yarn_obj.get_roles_by_type(role_type="RESOURCEMANAGER"): if rm.haStatus == "ACTIVE": host = cm_api.get_host(rm.hostRef.hostId) active_rm_ip = host.ipAddress active_rm_port = 8088 find_active_rm = True # if not find_active_rm: message = "can not find active rm" print( "[ERROR] " + message ) response['success'] = False response['message'] = message return HttpResponse( json.dumps(response) ) except Exception, e: message = "can not get cm yarn object" print( "[ERROR] " + message + str(e) ) response['success'] = False response['message'] = message return HttpResponse( json.dumps(response) ) # # all application statistics statistics_response = applications_statistics(active_rm_ip, active_rm_port, from_time, to_time, filter_dict) if statistics_response['success']: # # create ldap connection. access ldap to get group of account if create_ldap_connection(ldap_host, ldap_username, ldap_password): ldap_connection = create_ldap_connection(ldap_host, ldap_username, ldap_password) else: message = "can not connect to ldap://" + ldap_host response['success'] = False response['message'] = message return HttpResponse( json.dumps(response) ) # # init queue view result & group view result queue_view_final_result = statistics_response['message'] group_view_final_result = {} # # # add group information to queue view result and accumulate the result by group for queue, queue_info in queue_view_final_result.items(): # queue_view_final_result[queue]['group'] = '' # queue naming : root.SYSTEM.<account> , root.PERSONAL.<account> m = re.match(r"(?P<root>\w+)\.(?P<second>\w+)\.(?P<third>\w+)", queue) if m and m.group('root') == 'root' and ( m.group('second') == 'SYSTEM' or m.group('second') == 'PERSONAL' ): queue_view_final_result[queue]['account'] = m.group('third') group_query_result = query_group_of_user(ldap_connection, queue_view_final_result[queue]['account']) group = group_query_result['group'] project_name = group_query_result['name'] queue_view_final_result[queue]['group'] = group if not group_view_final_result.has_key(group): group_view_final_result[group] = { 'apps':{}, 'queues':[], 'name':project_name } group_view_final_result[group]['queues'].append(queue) # for app_type, app_info in queue_info['apps'].items(): for app_state, data in app_info['final_status'].items(): if not group_view_final_result[group]['apps'].has_key(app_state): group_view_final_result[group]['apps'][app_state] = {} for key in data: if not group_view_final_result[group]['apps'][app_state].has_key(key): group_view_final_result[group]['apps'][app_state][key] = data[key] else: group_view_final_result[group]['apps'][app_state][key] += data[key] # # after finish to accumulate all result, unbind ldap connection ldap_connection.unbind() else: response['success'] = False response['message'] = statistics_response['message'] return HttpResponse( json.dumps(response) ) # # transform duration type from datetime.timedelta to string queue_view_final_result = transform_queue_view_response(queue_view_final_result) group_view_final_result = transform_project_view_response(group_view_final_result) # response['success'] = True response['message'] = {} response['message']['queue_view'] = queue_view_final_result response['message']['group_view'] = group_view_final_result print json.dumps("[DEBUG] response = " + json.dumps(response)) return HttpResponse( json.dumps(response) )
print >> sys.stderr, '\n\tPlease specify a database and a table.' usage() return cf['RET_BADOPTS'] API = ApiResource(cmHost, cf['CM_PORT'], version=cf['CM_VERSION'], username=cf['CM_USER'], password=cf['CM_PASSWD'], use_tls=True) LOG.debug('Connected to CM host on ' + cmHost) procUser = getUsername() LOG.debug('Process effective username is ' + procUser) cluster = API.get_cluster(cf['CLUSTER_NAME']) prod_nav = { 'proto': cf['PROD_NAV_PROTO'], 'host': cf['PROD_NAV_HOST'], 'port': cf['PROD_NAV_PORT'], 'user': cf['PROD_NAV_USER'], 'passwd': cf['PROD_NAV_PASSWD'] } dr_nav = { 'proto': cf['DR_NAV_PROTO'], 'host': cf['DR_NAV_HOST'], 'port': cf['DR_NAV_PORT'], 'user': cf['DR_NAV_USER'], 'passwd': cf['DR_NAV_PASSWD'] }
import json from cm_api.api_client import ApiResource from cm_api.endpoints.types import ApiClusterTemplate from cm_api.endpoints.cms import ClouderaManager resource = ApiResource( "4d92d0ab-2fa6-4d9d-bef5-dbf0f5dc29ab.priv.cloud.scaleway.com", 7180, "admin", "admin", version=12) cluster = resource.get_cluster("Cluster 2") template = cluster.export() with open('/tmp/template.json', 'w') as outfile: json.dump(template.to_json_dict(), outfile, indent=4, sort_keys=True)
def main(): configfile='' if len(sys.argv) > 3 or len(sys.argv) < 3: print("Usage: %s -i configfile " % sys.argv[0]) sys.exit(2) try: myopts, args = getopt.getopt(sys.argv[1:],"i:h") except getopt.GetoptError as e: print (str(e)) print("Usage: %s -i configfile " % sys.argv[0]) sys.exit(2) for o, a in myopts: if o == '-i': configfile=a elif o == '-h': print("Usage: %s -i configfile " % sys.argv[0]) if os.path.isfile(configfile): print "processing configuration file...." pass else: print "file does not exist..." sys.exit(2) config = ConfigObj(configfile) cluster_name = config['cluster']['name'] cdh_manager = config['cluster']['cdh_manager'] cm_hostname = config['cluster']['cm_hostname'] hostnames = config['cluster']['server_hostnames'] services = config['cluster']['services'] server_rack = config['cluster']['server_rack'] server_login = config['cluster']['server_login'] server_passwd = config['cluster']['server_passwd'] server_key = config['cluster']['server_key'] server_passphrase = config['cluster']['server_passphrase'] cloudera_manager_repo = config['cluster']['cloudera_manager_repo'] cm_host = cdh_manager api = ApiResource(cm_host, username="******", password="******") #print config['hive']['config']['hive_metastore_database_name'] for c in api.get_all_clusters(): if c.name == cluster_name: #cluster = c print "Cluster %s already exists " % (cluster_name) print "Please manually delete the cluster %s , all hosts and associated services." % (cluster_name) sys.exit(0) else: print "Starting the automation process..." pass cdhproc(cluster_name,api,hostnames,server_rack,server_login,server_passwd,server_key,server_passphrase,cloudera_manager_repo) createMGMT(api,cm_hostname,server_login,server_passwd,server_passphrase,server_key) deployHDFSMAP(cluster_name,api,configfile) if "yarn" in services: createYarn(cluster_name,api,configfile) if "zookeeper" in services: createZookeeper(cluster_name,api,configfile) if "hive" in services: createHive(cluster_name,api,configfile) if "hbase" in services: createHbase(cluster_name,api,configfile) if "spark" in services: createSpark(cluster_name,api,configfile) if "impala" in services: createImpala(cluster_name,api,configfile) cluster = api.get_cluster(cluster_name) print "Stopping cluster..." cmd = cluster.stop().wait() print "Active: %s. Success: %s" % (cmd.active, cmd.success) print "Starting cluster..." cmd =cluster.start().wait() print "Active: %s. Success: %s" % (cmd.active, cmd.success) if "solr" in services: createSolr(cluster_name,api,configfile) if "flume" in services: createFlume(cluster_name,api,configfile) if "oozie" in services: createOozie(cluster_name,api,configfile) if "sqoop" in services: createSqoop(cluster_name,api,configfile) if "hue" in services: createHue(cluster_name,api,configfile) #print "Stopping cluster..." #cmd = cluster.stop().wait() #print "Active: %s. Success: %s" % (cmd.active, cmd.success) #print "Starting cluster..." #cmd =cluster.start().wait #print "Active: %s. Success: %s" % (cmd.active, cmd.success) print "Cluster deployed successfully...." print "Login to: http://"+cdh_manager+":7180"
if len(sys.argv) != 2: print "Error: Wrong number of arguments" print "Usage: create-flume-service.py <flume-service-name>" print "Example: create-flume-service.py Flume" quit(1) ## Name of Flume Service to create flume_service_name = sys.argv[1] ## Connect to CM print "\nConnecting to Cloudera Manager at " + cm_host + ":" + cm_port + "..." api = ApiResource(server_host=cm_host, server_port=cm_port, username=cm_login, password=cm_password, version=cm_api_version) print "Connection is good!" ## Get the Cluster cluster = api.get_cluster(cluster_name) ## Get the existing Services service_list = cluster.get_all_services() ## Check that a FLUME service does not already exist ## You could skip this check if you want if you want to have more than one FLUME services on your cluster ## I included the check just as a safeguard in case you only want one FLUME service on your cluster for service in service_list: if service.type == "FLUME": print "Error: A FLUME Service already exists (Service Name: '" + service.name + "')" print "Aborting..." exit(1) ## Check that there is an HDFS Service with the given name unless hdfs_service_name was specified as 'none' if hdfs_service_name != "none":
def get_cluster(): # connect to cloudera manager api = ApiResource(CM_HOST, username="******", password="******") # Take care of the case where cluster name has changed # Hopefully users wouldn't use this CM to deploy another cluster manually return (api, api.get_cluster(api.get_all_clusters()[0].name))
def runner(self, args, display=True): values = [] health_values = [] plugin_args = args.split() \ if args is not None and (len(args.strip()) > 0) \ else "" options = self.read_args(plugin_args) if options.hadoopdistro == 'CDH': api = ApiResource(server_host=options.cmhost, \ server_port=options.cmport, \ username=options.cmuser, \ password=options.cmpassword, \ version=11) cluster = api.get_cluster(api.get_all_clusters()[0].name) cdh = CDHData(api, cluster) else: cdh = HDPData(options.cmhost, options.cmuser, options.cmpassword) hbase = None def run_test_sequence(): # pylint: disable=too-many-return-statements hbase = happybase.Connection(host=cdh.get_hbase_endpoint()) if abort_test_sequence is True: return reason = [] try: start = TIMESTAMP_MILLIS() try: hbase.create_table('blackbox_test_table', {'cf': dict()}) logging.debug("test table created") except AlreadyExists: logging.debug("test table exists") table = hbase.table('blackbox_test_table') end = TIMESTAMP_MILLIS() create_table_ok = True create_table_ms = end - start values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'), "hadoop.HBASE.create_table_time_ms", [], create_table_ms)) except: LOGGER.error(traceback.format_exc()) create_table_ok = False reason = ['Create HBase table operation failed'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'), "hadoop.HBASE.create_table_succeeded", reason, create_table_ok)) #write some data to it if abort_test_sequence is True: return reason = [] try: start = TIMESTAMP_MILLIS() table.put('row_key', {'cf:column': 'value'}) end = TIMESTAMP_MILLIS() write_hbase_ok = True write_hbase_ms = end - start values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'), "hadoop.HBASE.write_time_ms", [], write_hbase_ms)) except: LOGGER.error(traceback.format_exc()) write_hbase_ok = False reason = ['Failed to insert row in HBase table'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'), "hadoop.HBASE.write_succeeded", reason, write_hbase_ok)) #read some data from it if abort_test_sequence is True: return reason = [] try: start = TIMESTAMP_MILLIS() row = table.row('row_key', columns=['cf:column']) end = TIMESTAMP_MILLIS() read_hbase_ms = end - start read_hbase_ok = row['cf:column'] == 'value' values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'), "hadoop.HBASE.read_time_ms", [], read_hbase_ms)) except: LOGGER.error(traceback.format_exc()) hbase_fix_output = subprocess.check_output([ 'sudo', '-u', 'hbase', 'hbase', 'hbck', '-repair', 'blackbox_test_table' ]) for line in hbase_fix_output.splitlines(): if 'Status:' in line or 'inconsistencies detected' in line: LOGGER.debug(line) subprocess.check_output([ 'sudo', '-u', 'hbase', 'hbase', 'zkcli', 'rmr', '/hbase/table/blackbox_test_table' ]) subprocess.check_output([ 'sudo', '-u', 'hdfs', 'hadoop', 'fs', '-rm', '-r', '-f', '-skipTrash', '/hbase/data/default/blackbox_test_table' ]) read_hbase_ok = False reason = ['Failed to fetch row by row key from HBase'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'), "hadoop.HBASE.read_succeeded", reason, read_hbase_ok)) #create some hive metadata reason = [] if abort_test_sequence is True: return try: start = TIMESTAMP_MILLIS() hive = hive_api.connect(cdh.get_hive_endpoint()) end = TIMESTAMP_MILLIS() hive.cursor().execute("DROP TABLE blackbox_test_table") connect_to_hive_ms = end - start connect_to_hive_ok = True values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'), "hadoop.HIVE.connection_time_ms", [], connect_to_hive_ms)) except: LOGGER.error(traceback.format_exc()) connect_to_hive_ok = False reason = ['Failed to connect to Hive Metastore'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'), "hadoop.HIVE.connection_succeeded", reason, connect_to_hive_ok)) if abort_test_sequence is True: return reason = [] try: start = TIMESTAMP_MILLIS() hive.cursor().execute(( "CREATE EXTERNAL TABLE " "blackbox_test_table (key STRING, value STRING)" "STORED BY \"org.apache.hadoop.hive.hbase.HBaseStorageHandler\" " "WITH SERDEPROPERTIES " "(\"hbase.columns.mapping\" = \":key,cf:column\") " "TBLPROPERTIES(\"hbase.table.name\" = \"blackbox_test_table\")" )) end = TIMESTAMP_MILLIS() create_metadata_ms = end - start create_metadata_ok = True values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'), "hadoop.HIVE.create_metadata_time_ms", [], create_metadata_ms)) except: LOGGER.error(traceback.format_exc()) create_metadata_ok = False reason = [ 'CREATE EXTERNAL TABLE statement failed on Hive Metastore' ] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'), "hadoop.HIVE.create_metadata_succeeded", reason, create_metadata_ok)) #read some data via impala using it if abort_test_sequence is True: return if cdh.get_impala_endpoint() is not None: reason = [] try: start = TIMESTAMP_MILLIS() impala = connect(host=cdh.get_impala_endpoint(), port=options.impalaport) end = TIMESTAMP_MILLIS() impala.cursor().execute("invalidate metadata") connect_to_impala_ms = end - start connect_to_impala_ok = True values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('IMPALA'), "hadoop.IMPALA.connection_time_ms", [], connect_to_impala_ms)) except: LOGGER.error(traceback.format_exc()) connect_to_impala_ok = False reason = ['Failed to connect to Impala'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('IMPALA'), "hadoop.IMPALA.connection_succeeded", reason, connect_to_impala_ok)) if abort_test_sequence is True: return reason = [] try: start = TIMESTAMP_MILLIS() impala_cursor = impala.cursor() impala_cursor.execute("SELECT * FROM blackbox_test_table") table_contents = impala_cursor.fetchall() end = TIMESTAMP_MILLIS() read_impala_ms = end - start read_impala_ok = table_contents[0][1] == 'value' values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('IMPALA'), "hadoop.IMPALA.read_time_ms", [], read_impala_ms)) except: LOGGER.error(traceback.format_exc()) read_impala_ok = False reason = ['Failed to SELECT from Impala'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('IMPALA'), "hadoop.IMPALA.read_succeeded", reason, read_impala_ok)) else: reason = [] try: start = TIMESTAMP_MILLIS() hive_cursor = hive.cursor() hive_cursor.execute("SELECT * FROM blackbox_test_table") table_contents = hive_cursor.fetchall() end = TIMESTAMP_MILLIS() read_hive_ms = end - start read_hive_ok = table_contents[0][1] == 'value' values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HQUERY'), "hadoop.HQUERY.read_time_ms", [], read_hive_ms)) except: LOGGER.error(traceback.format_exc()) read_hive_ok = False reason = ['Failed to SELECT from Hive'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HQUERY'), "hadoop.HQUERY.read_succeeded", reason, read_hive_ok)) #delete metadata if abort_test_sequence is True: return reason = [] try: start = TIMESTAMP_MILLIS() hive.cursor().execute("DROP TABLE blackbox_test_table") end = TIMESTAMP_MILLIS() drop_metadata_ms = end - start drop_metadata_ok = True values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'), "hadoop.HIVE.drop_table_time_ms", [], drop_metadata_ms)) except: LOGGER.error(traceback.format_exc()) drop_metadata_ok = False reason = ['Failed to DROP table in Hive Metastore'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'), "hadoop.HIVE.drop_table_succeeded", reason, drop_metadata_ok)) #delete hbase table if abort_test_sequence is True: return reason = [] try: start = TIMESTAMP_MILLIS() # Disabled deleting table to work around apparent hbase bug (see VPP-17) but leaving # test step in so it can be easily re-enabled for testing. #hbase.disable_table('blackbox_test_table') #hbase.delete_table('blackbox_test_table') end = TIMESTAMP_MILLIS() drop_table_ms = end - start drop_table_ok = True values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'), "hadoop.HBASE.drop_table_time_ms", [], drop_table_ms)) except: LOGGER.error(traceback.format_exc()) drop_table_ok = False reason = ['Failed to drop table in HBase'] health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'), "hadoop.HBASE.drop_table_succeeded", reason, drop_table_ok)) def to_status(flag): ''' Convert True to OK and False to ERROR ''' if flag in [True, False]: status = 'OK' if flag is True else 'ERROR' else: status = flag return status def default_health_value(name, service, operation, failed_step): result = False if len([event for event in health_values if event.metric == name]) == 0: if failed_step is not None: message = 'Did not attempt to %s due to timeout waiting for: %s' % ( operation, failed_step) else: message = 'Timed out waiting for %s to complete' % operation health_values.append( Event(TIMESTAMP_MILLIS(), cdh.get_name(service), name, [message], False)) result = True return result test_thread = threading.Thread(target=run_test_sequence) test_thread.daemon = True abort_test_sequence = False test_thread.start() test_thread.join(60.0) abort_test_sequence = True if hbase is not None: hbase.close() failed_step = None if default_health_value("hadoop.HBASE.create_table_succeeded", "HBASE", "create HBase table", failed_step) and failed_step is None: failed_step = "create HBase table" if default_health_value("hadoop.HBASE.write_succeeded", "HBASE", "write to HBase", failed_step) and failed_step is None: failed_step = "write to HBase" if default_health_value("hadoop.HBASE.read_succeeded", "HBASE", "read from HBase", failed_step) and failed_step is None: failed_step = "read from HBase" if default_health_value("hadoop.HIVE.connection_succeeded", "HIVE", "connect to Hive Metastore", failed_step) and failed_step is None: failed_step = "connect to Hive Metastore" if default_health_value("hadoop.HIVE.create_metadata_succeeded", "HIVE", "create Hive Metastore table", failed_step) and failed_step is None: failed_step = "create Hive Metastore table" if cdh.get_impala_endpoint() is not None: if default_health_value("hadoop.IMPALA.connection_succeeded", "IMPALA", "connect to Impala", failed_step) and failed_step is None: failed_step = "connect to Impala" if default_health_value("hadoop.IMPALA.read_succeeded", "IMPALA", "SELECT from Impala", failed_step) and failed_step is None: failed_step = "SELECT from Impala" else: if default_health_value("hadoop.HQUERY.read_succeeded", "HQUERY", "SELECT from Hive", failed_step) and failed_step is None: failed_step = "SELECT from Hive" if default_health_value("hadoop.HIVE.drop_table_succeeded", "HIVE", "DROP table in Hive Metastore", failed_step) and failed_step is None: failed_step = "DROP table in Hive Metastore" if default_health_value("hadoop.HBASE.drop_table_succeeded", "HBASE", "drop table in HBase", failed_step) and failed_step is None: failed_step = "drop table in HBase" cdh_status_indicators = cdh.get_status_indicators() health_values.extend(cdh_status_indicators) overall = {} for health_val in health_values: try: current = overall[health_val.source] current_val = to_status(current.value) current_causes = current.causes except KeyError: current_val = 'OK' current_causes = [] update = to_status(health_val.value) # If current is ERROR, output is ERROR, regardless # If current is WARN, output is WARN if update is OK but ERROR if further WARN or ERROR # If update is OK, output is OK if OK, WARN if WARN and ERROR if ERROR out = 'ERROR' if current_val != "ERROR": if current_val == 'WARN': if update == 'OK': out = 'WARN' if current_val == 'OK': out = update current_val = out current_causes.extend(health_val.causes) overall[health_val.source] = Event( health_val.timestamp, health_val.source, 'hadoop.%s.health' % cdh.get_type(health_val.source), current_causes, current_val) values.extend(health_values) values.extend(overall.values()) if display: self._do_display(values) return values
class Deploy: def __init__(self, cm_port='7180', cm_user='******', cm_passwd='admin', cluster_name='cluster1'): self.cluster_name = cluster_name self.cdh_version = "CDH5" self.cfg = ParseConfig() self.host_list = self.cfg.get_hosts() self._get_host_allocate() self.cm_host = self.host_list[0] self.api = ApiResource(self.cm_host, cm_port, cm_user, cm_passwd, version=7) self.cm = self.api.get_cloudera_manager() try: self.cluster = self.api.get_cluster(self.cluster_name) except: try: self.cluster = self.api.create_cluster(self.cluster_name, self.cdh_version) except: err('Cannot connect to cloudera manager on %s' % self.cm_host) # add all our hosts to the cluster try: self.cluster.add_hosts(self.host_list) info('Add hosts successfully') except Exception as e: if e.code == 400: info('Already Added hosts') elif e.code == 404: err(e.message) def _auto_allocate(self, hosts): # enable mgmt node if node count is larger than mgmt_th mgmt_th = 6 if type(hosts) != list: err('hosts parameter should be a list') host_num = len(hosts) # node<=3, ZK=1 ,node>3, ZK=3 zk_num = 1 if host_num <= 3 else 3 # with mgmt node if host_num >= mgmt_th: self.ap_host = self.es_host = self.ho_host = self.sm_host = self.nn_host = self.hm_host = self.jt_host = hosts[ 0] self.dn_hosts = self.rs_hosts = self.tt_hosts = hosts[1:] self.snn_host = hosts[1] self.hms_host = hosts[2] self.hs2_host = hosts[3] # without mgmt node else: if host_num == 1: self.ap_host = self.es_host = self.ho_host = self.sm_host = self.jt_host = \ self.nn_host = self.hm_host = self.snn_host = self.hms_host = self.hs2_host = hosts[0] elif host_num > 1: # nn, snn not on same node tmp_hosts = hosts[:] self.nn_host = choice(tmp_hosts) tmp_hosts.remove(self.nn_host) self.snn_host = choice(tmp_hosts) self.hm_host = choice(tmp_hosts) self.jt_host = choice(hosts) self.hms_host = choice(hosts) self.hs2_host = choice(hosts) # cm self.ap_host = choice(hosts) self.es_host = choice(hosts) self.ho_host = choice(hosts) self.sm_host = choice(hosts) self.dn_hosts = self.rs_hosts = self.tt_hosts = hosts self.zk_hosts = hosts[-zk_num:] def _get_host_allocate(self): roles = self.cfg.get_roles() # auto set if no role config found if not roles: self._auto_allocate(self.host_list) return valid_roles = [ 'DN', 'RS', 'ZK', 'HM', 'NN', 'SNN', 'AP', 'ES', 'SM', 'HO', 'TT', 'JT', 'HMS', 'HS2' ] role_host = defaultdict(list) for item in roles: for role in item[1]: role = role.strip() if role not in valid_roles: err('Incorrect role config') role_host[role].append(item[0]) # cdh self.nn_host = role_host['NN'][0] self.snn_host = role_host['SNN'][0] self.hm_host = role_host['HM'][0] self.jt_host = role_host['JT'][0] self.hms_host = role_host['HMS'][0] self.hs2_host = role_host['HS2'][0] self.tt_hosts = role_host['TT'] self.zk_hosts = role_host['ZK'] self.dn_hosts = role_host['DN'] self.rs_hosts = role_host['RS'] # cm self.ap_host = role_host['AP'][0] self.es_host = role_host['ES'][0] self.ho_host = role_host['HO'][0] self.sm_host = role_host['SM'][0] def setup_cms(self): try: self.cm.delete_mgmt_service() except: pass # create the management service try: mgmt = self.cm.create_mgmt_service(ApiServiceSetupInfo()) mgmt.create_role('AlertPublisher', "ALERTPUBLISHER", self.ap_host) mgmt.create_role('EventServer', "EVENTSERVER", self.es_host) mgmt.create_role('HostMonitor', "HOSTMONITOR", self.hm_host) mgmt.create_role('ServiceMonitor', "SERVICEMONITOR", self.sm_host) ok('Cloudera management service created successfully.') except ApiException: info('Cloudera management service had already been created.') def setup_parcel(self): parcels_list = [] i = 1 for p in self.cluster.get_all_parcels(): if p.stage == 'AVAILABLE_REMOTELY': continue elif p.stage == 'ACTIVATED': info('Parcel [%s] has already been activated' % p.version) return else: print '\t' + str(i) + ': ' + p.product + ' ' + p.version i += 1 parcels_list.append(p) if len(parcels_list) == 0: err('No downloaded ' + self.cdh_version + ' parcel found!') elif len(parcels_list) > 1: index = raw_input('Input parcel number:') if not index.isdigit: err('Error index, must be a number') cdh_parcel = parcels_list[int(index) - 1] else: cdh_parcel = parcels_list[0] # # download the parcel # print "Starting parcel download. This might take a while." # cmd = cdh_parcel.start_download() # if cmd.success != True: # print "Parcel download failed!" # exit(0) # # make sure the download finishes # while cdh_parcel.stage != 'DOWNLOADED': # sleep(5) # cdh_parcel = self.cluster.get_parcel(cdh_parcel.product, cdh_parcel.version) # print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded" # distribute the parcel info('Starting parcel distribution. This might take a while.') cmd = cdh_parcel.start_distribution() i = 0 while cmd.success == None: i += 1 sleep(5) cmd = cmd.fetch() s = '.' * i print '\r%s' % s, sys.stdout.flush() if cmd.success != True: err('Parcel distribution failed!') # make sure the distribution finishes while cdh_parcel.stage != "DISTRIBUTED": sleep(5) cdh_parcel = self.cluster.get_parcel(cdh_parcel.product, cdh_parcel.version) ok(cdh_parcel.product + ' ' + cdh_parcel.version + ' distributed') # activate the parcel cmd = cdh_parcel.activate() if cmd.success != True: err('Parcel activation failed!') # make sure the activation finishes while cdh_parcel.stage != "ACTIVATED": sleep(5) cdh_parcel = self.cluster.get_parcel(cdh_parcel.product, cdh_parcel.version) ok(cdh_parcel.product + ' ' + cdh_parcel.version + ' activated') def _create_service(self, sdata): try: self.cluster.get_service(sdata['sname']) info('Service %s had already been configured' % sdata['sname']) except ApiException: service = self.cluster.create_service(sdata['sname'], sdata['stype']) ok('Service %s had been created successfully' % sdata['sname']) for role in sdata['roles']: if role.has_key('rhost'): service.create_role(role['rname'], role['rtype'], role['rhost']) elif role.has_key('rhosts'): rid = 0 for host in role['rhosts']: rid += 1 service.create_role(role['rname'] + '-' + str(rid), role['rtype'], host) def setup_cdh(self): service_data = [{ 'sname': 'hdfs', 'stype': 'HDFS', 'roles': [{ 'rname': 'hdfs-namenode', 'rtype': 'NAMENODE', 'rhost': self.nn_host }, { 'rname': 'hdfs-secondarynamenode', 'rtype': 'SECONDARYNAMENODE', 'rhost': self.snn_host }, { 'rname': 'hdfs-datanode', 'rtype': 'DATANODE', 'rhosts': self.dn_hosts }] }, { 'sname': 'zookeeper', 'stype': 'ZOOKEEPER', 'roles': [{ 'rname': 'zookeeper', 'rtype': 'SERVER', 'rhosts': self.zk_hosts }] }, { 'sname': 'hbase', 'stype': 'HBASE', 'roles': [{ 'rname': 'hbase-master', 'rtype': 'MASTER', 'rhost': self.hm_host }, { 'rname': 'hdfs-regionserver', 'rtype': 'REGIONSERVER', 'rhosts': self.rs_hosts }] }, { 'sname': 'hive', 'stype': 'HIVE', 'roles': [{ 'rname': 'hive-metastore', 'rtype': 'HIVEMETASTORE', 'rhost': self.hms_host }, { 'rname': 'hive-server2', 'rtype': 'HIVESERVER2', 'rhost': self.hs2_host }, { 'rname': 'hive-gateway', 'rtype': 'GATEWAY', 'rhosts': self.dn_hosts }] }, { 'sname': 'mapreduce', 'stype': 'MAPREDUCE', 'roles': [{ 'rname': 'mapreduce-jobtracker', 'rtype': 'JOBTRACKER', 'rhost': self.jt_host }, { 'rname': 'mapreduce-tasktracker', 'rtype': 'TASKTRACKER', 'rhosts': self.tt_hosts }] }] for sdata in service_data: self._create_service(sdata) # additional config for hive try: hive_service = self.cluster.get_service('hive') hive_metastore_host = self.cm_host # should be same as cm's host, FQDN hive_metastore_name = 'hive' hive_metastore_password = '******' hive_metastore_database_port = '7432' hive_metastore_database_type = 'postgresql' hive_config = { 'hive_metastore_database_host' : hive_metastore_host, \ 'hive_metastore_database_name' : hive_metastore_name, \ 'hive_metastore_database_password' : hive_metastore_password, \ 'hive_metastore_database_port' : hive_metastore_database_port, \ 'hive_metastore_database_type' : hive_metastore_database_type } hive_service.update_config(hive_config) ok('Additional hive configs had been updated') except ApiException as e: err(e.message) # use auto configure for *-site.xml configs try: self.cluster.auto_configure() except ApiException as e: err(e.message) def start_cms(self): # start the management service info('Starting cloudera management service...') cms = self.cm.get_service() cms.start().wait() ok('Cloudera management service started successfully') def start_cdh(self): info('Excuting first run command. This might take a while.') cmd = self.cluster.first_run() while cmd.success == None: cmd = cmd.fetch() sleep(1) if cmd.success != True: err('The first run command failed: ' + cmd.resultMessage) ok('First run successfully executed. Your cluster has been set up!')
def main(passed_username, passed_password, passed_database): PEER_NAME = 'PRODUCTION' # Previously TARGET_CLUSTER_NAME = 'DEV' # defined SOURCE_CLUSTER_NAME = 'cluster' # at Experian cm_host = 'br1andvhmn11.passporthealth.com' cm_client.configuration.username = passed_username # Ensure that password is quoted cm_client.configuration.password = "******" + passed_password + "'" sourceDatabase = passed_database # Setup authentication for SSL cm_client.configuration.verify_ssl = True cm_client.configuration.ssl_ca_cert = '/opt/cloudera/security/pki/x509/truststore.pem' # Create an instance of the API class api_host = 'https://br1andvhmn11.passporthealth.com' port = '7183' api_version = 'v30' impala_host = 'br1anprhsn02.passporthealth.com' # Construct base URL for API # http://cmhost:7180/api/v30 api_url = api_host + ':' + port + '/api/' + api_version api_client = cm_client.ApiClient(api_url) cluster_api_instance = cm_client.ClustersResourceApi(api_client) # Lists all known clusters. api_response = cluster_api_instance.read_clusters(view='SUMMARY') for cluster in api_response.items: print cluster.name, "-", cluster.full_version services_api_instance = cm_client.ServicesResourceApi(api_client) services = services_api_instance.read_services(cluster.name, view='FULL') for service in services.items: # print service.display_name, "-", service.type if service.type == 'HIVE': targetHive = service targetCluster = cluster print targetHive.name, targetHive.service_state, targetHive.health_summary for health_check in targetHive.health_checks: print health_check.name, "---", health_check.summary # print "Source database = " + sourceDatabase ###show_statement = "'show tables in " + sourceDatabase +"'" ###streamOperand = "impala-shell -i " + impala_host + " -d default -k --ssl --ca_cert=/opt/cloudera/security/pki/x509/truststore.pem -q " + show_statement ###stream=os.popen(streamOperand) ### ###output=stream.readlines() ###lineno =0 ###numtables = 0 ###tablenames = [] ###for line in output: ### if lineno <= 2: # skip heading lines ### pass ### elif line[0:3] == "+--": # skip last line ### pass ### else: # strip out tablename ### name = line[2:] ### blank = name.index(' ') ### tablenames.append(name[0:blank]) ### numtables +=1 ### lineno +=1 ###print str(numtables) + " tables in database " + sourceDatabase ###for table in tablenames: ### print table tablenames = [] tablenames.append("test") tablenames.append("test2") api_root = ApiResource(cm_host, username=passed_username, password=passed_password, use_tls=True) PEER_NAME = 'PRODUCTION' SOURCE_HDFS_NAME = 'hdfs' TARGET_HDFS_NAME = 'hdfs' SOURCE_HIVE_NAME = 'hive' TARGET_HIVE_NAME = 'hive' SOURCE_CLUSTER_NAME = 'cluster' TARGET_CLUSTER_NAME = 'DEV' TARGET_YARN_SERVICE = 'yarn' # Setup for Hive replication hive = api_root.get_cluster(TARGET_CLUSTER_NAME).get_service(TARGET_HIVE_NAME) hive_args = ApiHiveReplicationArguments(None) hdfs_args = ApiHdfsReplicationArguments(None) # Needed for replicating table data stored in HDFS hive_args.sourceService = ApiServiceRef(None, peerName=PEER_NAME, clusterName=SOURCE_CLUSTER_NAME, serviceName=SOURCE_HIVE_NAME) # Define tables to replicate table_filters = [] table = ApiHiveTable(None) for tab in tablenames: table.database = (passed_database) table.tableName = (tab) table_filters = [] table_filters.append(table) print "Replicating " + passed_database + "." + tab hive_args.tableFilters = table_filters hive_args.force = True # Overwrite existing tables hive_args.replicateData = True # Replicate table data stored in HDFS hdfs_args.skipChecksumChecks = True hdfs_args.skipListingChecksumChecks = True hdfs_args.preserveBlockSize = True hdfs_args.preserveReplicationCount = True hdfs_args.preservePermissions = True # Define HDFS portion of the Hive replication as needed hdfs_args.destinationPath = '/user/bob.marshall/repltest' # Argument? Path relative to servicename? hdfs_args.mapreduceServiceName = TARGET_YARN_SERVICE hdfs_args.userName = passed_username hdfs_args.sourceUser = passed_username hive_args.hdfsArguments = hdfs_args start = datetime.datetime.now() end = start + datetime.timedelta(days=1) interval = "DAY" numinterval = 1 pause = True print "Creating Hive Replication Schedule" schedule = hive.create_replication_schedule(start, end, interval, numinterval, pause, hive_args) print "Starting Hive Replication" cmd = hive.trigger_replication_schedule(schedule.id) print "Waiting for completion" cmd = cmd.wait() print "Getting result" result = hive.get_replication_schedule(schedule.id).history[0].hiveResult print result print "Cleanup... Remove Hive replication schedule" sch = hive.delete_replication_schedule(schedule.id) print sch exit(0) #scheds = hive.get_replication_schedules() #sch = hive.delete_replication_schedule(162) # Setup for HDFS replication hdfs = api_root.get_cluster(TARGET_CLUSTER_NAME).get_service(TARGET_HDFS_NAME) hdfs_args = ApiHdfsReplicationArguments(None) hdfs_args.sourceService = ApiServiceRef(None, peerName=PEER_NAME, clusterName=SOURCE_CLUSTER_NAME, serviceName=SOURCE_HDFS_NAME) hdfs_args.sourcePath = '/user/bob.marshall/repltest' hdfs_args.destinationPath = '/user/bob.marshall/repltest' hdfs_args.mapreduceServiceName = TARGET_YARN_SERVICE hdfs_args.userName = args.username hdfs_args.sourceUser = args.username hdfs_args.preserveBlockSize = True hdfs_args.preserveReplicationCount = True hdfs_args.preservePermissions = True hdfs_args.skipChecksumChecks = True hdfs_args.skipListingChecksumChecks = True start = datetime.datetime.now() end = start + datetime.timedelta(days=1) interval = "DAY" numinterval = 1 pause = True #schedule = hdfs.create_replication_schedule(start, end, interval, interval, pause, hdfs_args) print "Creating HDFS Replication Schedule" schedule = hdfs.create_replication_schedule(start, end, "DAY", 1, True, hdfs_args) print "Starting HDFS Replication" cmd = hdfs.trigger_replication_schedule(schedule.id) print "Waiting for completion" cmd = cmd.wait() print "Getting result" result = hdfs.get_replication_schedule(schedule.id).history[0].hdfsResult print result print "Cleanup... Remove HDFS replication schedule" sch = hdfs.delete_replication_schedule(schedule.id) print sch #scheds = hdfs.get_replication_schedules() #sch = hdfs.delete_replication_schedule(27) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Perform BDR jobs while getting around BDR limitations.') parser.add_argument("username") parser.add_argument("password") parser.add_argument("database") args = parser.parse_args() main(args.username, args.password, args.database)
class ClouderaManagerDeployment(object): def __init__(self, cm_server_address, cm_server_port=DEFAULT_CM_PORT, username=DEFAULT_CM_USERNAME, password=DEFAULT_CM_PASSWORD): self.cm_server_address = cm_server_address self.cm_server_port = cm_server_port self.username = username self.password = password def setup_api_resources(self): self.api = ApiResource(server_host=self.cm_server_address, server_port=self.cm_server_port, username=self.username, password=self.password, version=self._get_api_version()) self.cm = self.api.get_cloudera_manager() self.cluster = self.api.get_cluster('Cluster 1 (clusterdock)') def prep_for_start(self): pass def validate_services_started(self, timeout_min=10, healthy_time_threshold_sec=30): start_validating_time = time() healthy_time = None logger.info('Beginning service health validation...') while healthy_time is None or (time() - healthy_time < healthy_time_threshold_sec): if (time() - start_validating_time < timeout_min * 60): all_services = list(self.cluster.get_all_services()) + [self.cm.get_service()] at_fault_services = list() for service in all_services: if (service.serviceState != "NA" and service.serviceState != "STARTED"): at_fault_services.append([service.name, "NOT STARTED"]) elif (service.serviceState != "NA" and service.healthSummary != "GOOD"): checks = list() for check in service.healthChecks: if (check["summary"] not in ("GOOD", "DISABLED")): checks.append(check["name"]) at_fault_services.append([service.name, "Failed health checks: {0}".format(checks)]) if not healthy_time or at_fault_services: healthy_time = time() if not at_fault_services else None sleep(3) else: raise Exception(("Timed out after waiting {0} minutes for services to start " "(at fault: {1}).").format(timeout_min, at_fault_services)) logger.info("Validated that all services started (time: %.2f s).", time() - start_validating_time) def add_hosts_to_cluster(self, secondary_node_fqdn, all_fqdns): cm_utils.add_hosts_to_cluster(api=self.api, cluster=self.cluster, secondary_node_fqdn=secondary_node_fqdn, all_fqdns=all_fqdns) def update_hive_metastore_namenodes(self): for service in self.cluster.get_all_services(): if service.type == 'HIVE': logger.info('Updating NameNode references in Hive metastore...') update_metastore_namenodes_cmd = service.update_metastore_namenodes().wait() if not update_metastore_namenodes_cmd.success: logger.warning(("Failed to update NameNode references in Hive metastore " "(command returned %s)."), update_metastore_namenodes_cmd) def update_database_configs(self): cm_utils.update_database_configs(api=self.api, cluster=self.cluster) def _get_api_version(self): api_version_response = requests.get( "http://{0}:{1}/api/version".format(self.cm_server_address, self.cm_server_port), auth=(self.username, self.password)) api_version_response.raise_for_status() api_version = api_version_response.content if 'v' not in api_version: raise Exception("/api/version returned unexpected result (%s).", api_version) else: logger.info("Detected CM API %s.", api_version) return api_version.strip('v')