def main(): parser = cm_args_parser() args = parser.parse_args() print "conencting to host:",args.cm_host+"..." api = ApiResource(args.cm_host, username=args.cm_user, password=args.cm_password) cm = ClouderaManager(api) cm.update_config({"dssd_enabled":"true"})
def install_cds(cm_host, host_list): print "Installing CDS for Spark2" api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7) cm = ClouderaManager(api) config = cm.get_config(view='full') # Add parcel repository repolist = config['REMOTE_PARCEL_REPO_URLS'] value = repolist.value or repolist.default value += ',' + cds_parcel_repo cm.update_config({'REMOTE_PARCEL_REPO_URLS': value}) sleep(10) # Install CDS parcel cluster = api.get_cluster(cluster_name) parcel = cluster.get_parcel(cds_parcel_name, cds_parcel_version) print "Downloading CDS parcel. This might take a while." if parcel.stage == "AVAILABLE_REMOTELY": parcel = wait_for_parcel(parcel.start_download(), api, parcel, cluster_name, 'DOWNLOADED') print "Distributing CDS parcel. This might take a while." if parcel.stage == "DOWNLOADED": parcel = wait_for_parcel(parcel.start_distribution(), api, parcel, cluster_name, 'DISTRIBUTED') print "Activating CDS parcel. This might take a while." if parcel.stage == "DISTRIBUTED": parcel = wait_for_parcel(parcel.activate(), api, parcel, cluster_name, 'ACTIVATED') service = cluster.create_service(cds_service_name, cds_service_type) slaves = [host for host in host_list if 'slave' in host] edges = [host for host in host_list if 'edge' in host] service.create_role('SPARK2_YARN_HISTORY_SERVER-1', 'SPARK2_YARN_HISTORY_SERVER', cm_host) service.create_role('SPARK2_ON_YARN-GW_MASTER1', 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('SPARK2_ON_YARN-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('SPARK2_ON_YARN-GW_SLAVE%s' % i, 'GATEWAY', slave) cluster.auto_configure() # Restart Cloudera Management Service and cluster cm_service = cm.get_service() cm_service.restart().wait() cluster.restart(restart_only_stale_services=True, redeploy_client_configuration=True).wait() # Due to (presumably) CM bug, auto_configure() after Kafka installation creates additional # role config group for HDFS gateway, which breaks further use of auto_configure(). # Below we remove it if it exists. try: hdfs_service = cluster.get_service("HDFS-1") hdfs_service.delete_role_config_group("HDFS-1-GATEWAY-1") except cm_api.api_client.ApiException: print( "Not removing HDFS Gateway role config group as it doesn't exist") else: print("Removed additional HDFS Gateway role config group") print "CDS is now installed."
def main(): module = AnsibleModule( argument_spec=dict( cm_host=dict(required=True, type='str'), cm_port=dict(required=False, type='int', default=7180), cm_username=dict(required=True, type='str'), cm_password=dict(required=True, type='str', no_log=True), cm_tls=dict(required=False, type='bool', default=False), cm_version=dict(required=False, type='int', default=10), name=dict(required=False, type='str'), value=dict(required=False, type='str'), state=dict(default='present', choices=['present', 'absent', 'list']) ) ) cm_host = module.params.get('cm_host') cm_port = module.params.get('cm_port') cm_username = module.params.get('cm_username') cm_password = module.params.get('cm_password') cm_tls = module.params.get('cm_tls') cm_version = module.params.get('cm_version') cm_config_key = module.params.get('name') cm_config_value = module.params.get('value') state = module.params.get('state') changed = False if not HAS_CM_API: module.fail_json(changed=changed, msg='cm_api required for this module') try: cm_conn = ApiResource(cm_host, server_port=cm_port, username=cm_username, password=cm_password, use_tls=cm_tls, version=cm_version) cms = ClouderaManager(cm_conn) except ApiException as e: module.fail_json(changed=changed, msg="Can't connect to API: {}".format(e)) try: _settings = cms.get_config('full') except ApiException as e: module.fail_json(changed=changed, msg="{}".format(e)) _settings = cms.get_config('full') settings = dict() for key in _settings: settings[key] = dict() settings[key]['name'] = _settings[key].name settings[key]['default'] = _settings[key].default settings[key]['value'] = None if _settings[key].value is not None: settings[key]['value'] = _settings[key].value if state == 'list': module.exit_json(changed=changed, settings=settings) if cm_config_key is None: module.fail_json(changed=changed, msg='Missing `name` option.') if cm_config_key not in settings: module.fail_json(changed=changed, msg='{} is not a valid configuration entry'.format(cm_config_key)) if state == "absent": cm_config_value = None elif cm_config_value is None: module.fail_json(changed=changed, msg='Missing `value` option.') if cm_config_value != settings[cm_config_key]['value']: try: update = dict() update[cm_config_key] = cm_config_value rc = cms.update_config(update) module.exit_json(changed=True, settings=rc) except Exception as e: module.fail_json(changed=False, msg="{}".format(e)) module.exit_json(changed=False, settings=cms.get_config('summary'))
def set_up_cluster(cm_host, host_list): print "Setting up CDH cluster..." api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7) cm = ClouderaManager(api) print "Creating mgmg service." try: service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT") cm.create_mgmt_service(service_setup) except ApiException as exc: if exc.code != 400: print "create MGMT service failed: " + exc exit(1) print "Installing hosts. This might take a while." cmd = cm.host_install(host_username, host_list, password=host_password).wait() if cmd.success != True: print "cm_host_install failed: " + cmd.resultMessage exit(2) print "Auto-assign roles and auto-configure the CM service" if not is_cluster_installed(api): cm.auto_assign_roles() cm.auto_configure() print "Creating cluster." if not is_cluster_installed(api): cluster = create_cluster(api, cluster_name, cdh_version) cluster.add_hosts(host_list) cluster = api.get_cluster(cluster_name) cdh_parcel = get_cdh_parcel(cluster) print "Downloading CDH parcel. This might take a while." if cdh_parcel.stage == "AVAILABLE_REMOTELY": cdh_parcel = wait_for_parcel(cdh_parcel.start_download(), api, cdh_parcel, cluster_name, 'DOWNLOADED') print "Distributing CDH parcel. This might take a while." if cdh_parcel.stage == "DOWNLOADED": cdh_parcel = wait_for_parcel(cdh_parcel.start_distribution(), api, cdh_parcel, cluster_name, 'DISTRIBUTED') print "Activating CDH parcel. This might take a while." if cdh_parcel.stage == "DISTRIBUTED": cdh_parcel = wait_for_parcel(cdh_parcel.activate(), api, cdh_parcel, cluster_name, 'ACTIVATED') # if cdh_parcel.stage != "ACTIVATED": # print "CDH parcel activation failed. Parcel in stage: " + cdh_parcel.stage # exit(14) print "Inspecting hosts. This might take a few minutes." cmd = cm.inspect_hosts() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "Host inpsection failed!" exit(8) print "Hosts successfully inspected: \n" + cmd.resultMessage print "Creating specified services." for s in service_types_and_names.keys(): try: cluster.get_service(service_types_and_names[s]) except: print "Creating service: " + service_types_and_names[s] service = cluster.create_service(service_types_and_names[s], s) slaves = [host for host in host_list if 'slave' in host] edges = [host for host in host_list if 'edge' in host] #assign master roles to master node for service in cluster.get_all_services(): if service.name == 'HDFS-1': service.create_role('NAMENODE-1', 'NAMENODE', cm_host) service.create_role('SECONDARYNAMENODE', 'SECONDARYNAMENODE', cm_host) service.create_role('BALANCER-1', 'BALANCER', cm_host) service.create_role('HTTPFS-1', 'HTTPFS', cm_host) service.create_role('HDFS-GW_MASTER1', 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('HDFS-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('DATANODE-%s' % i, 'DATANODE', slave) if service.name == 'ZOOKEEPER-1': service.create_role('ZOOKEEPERSERVER-1', 'SERVER', cm_host) if service.name == 'HBASE-1': service.create_role('MASTER-1', 'MASTER', cm_host) service.create_role('HBASETHRIFTSERVER-1', 'HBASETHRIFTSERVER', cm_host) for (i, slave) in enumerate(slaves): service.create_role('HBASE-RS-%s' % i, 'REGIONSERVER', slave) if service.name == 'HUE-1': service.create_role('HUE-MASTER1', 'HUE_SERVER', cm_host) service.create_role('HUE-LB_MASTER1', 'HUE_LOAD_BALANCER', cm_host) for (i, edge) in enumerate(edges): service.create_role('HUE-EDGE%s' % i, 'HUE_SERVER', edge) if service.name == 'HIVE-1': service.create_role('HIVEMETASTORE-1', 'HIVEMETASTORE', cm_host) service.create_role('HIVESERVER-1', 'HIVESERVER2', cm_host) service.create_role('HIVE-GW_MASTER1', 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('HIVE-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('HIVE-GW_SLAVE%s' % i, 'GATEWAY', slave) if service.name == 'IMPALA-1': service.create_role('STATESTORE-1', 'STATESTORE', cm_host) service.create_role('CATALOGSERVER-1', 'CATALOGSERVER', cm_host) for (i, slave) in enumerate(slaves): service.create_role('IMPALAD-%s' % i, 'IMPALAD', slave) if service.name == 'OOZIE-1': service.create_role('OOZIE_SERVER-1', 'OOZIE_SERVER', cm_host) if service.name == 'SPARK_ON_YARN-1': service.create_role('SPARK_YARN_HISTORY_SERVER-1', 'SPARK_YARN_HISTORY_SERVER', cm_host) service.create_role('SPARK_ON_YARN-GW_MASTER%s' % i, 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('SPARK_ON_YARN-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('SPARK_ON_YARN-GW_SLAVE%s' % i, 'GATEWAY', slave) if service.name == 'SQOOP-1': service.create_role('SQOOP_SERVER-1', 'SQOOP_SERVER', cm_host) if service.name == 'YARN-1': service.create_role('RESOURCEMANAGER-1', 'RESOURCEMANAGER', cm_host) service.create_role('JOBHISTORY-1', 'JOBHISTORY', cm_host) for (i, slave) in enumerate(slaves): service.create_role('NODEMANAGER-%s' % i, 'NODEMANAGER', slave) #print "Auto assigning roles." #cluster.auto_assign_roles() cluster.auto_configure() print "Updating Hive config." hive_metastore_host = cm_host # let's assume that hive = cluster.get_service(service_types_and_names["HIVE"]) hive_config = { "hive_metastore_database_host" : hive_metastore_host, \ "hive_metastore_database_name" : hive_metastore_name, \ "hive_metastore_database_user" : hive_metastore_user, \ "hive_metastore_database_password" : hive_metastore_password, \ "hive_metastore_database_port" : hive_metastore_database_port, \ "hive_metastore_database_type" : hive_metastore_database_type } hive.update_config(hive_config) print "Updating Hue config." hue_db_host = cm_host # let's assume that hue = cluster.get_service(service_types_and_names["HUE"]) hue_config = { "database_host" : hue_db_host, \ "database_name" : hue_db_name, \ "database_user" : hue_db_user, \ "database_password" : hue_db_password, \ "database_port" : hue_db_port, \ "database_type" : hue_db_type } hue.update_config(hue_config) # Set Java version to OpenJDK cm.update_all_hosts_config({'java_home': '/usr/lib/jvm/java-openjdk'}) print "Starting management service." cm_service = cm.get_service() cm_service.start().wait() print "Excuting first run command. This might take a while." cmd = cluster.first_run().wait() if cmd.success != True: print "The first run command failed: " + cmd.resultMessage exit(11) print "First run successfully executed. Your cluster has been set up!" config = cm.get_config(view='full') repolist = config['REMOTE_PARCEL_REPO_URLS'] value = repolist.value or repolist.default value += ',' + anaconda_repo cm.update_config({'REMOTE_PARCEL_REPO_URLS': value}) sleep(10) cluster = api.get_cluster(cluster_name) parcel = cluster.get_parcel('Anaconda', anaconda_parcel_version) print "Downloading Anaconda parcel. This might take a while." if parcel.stage == "AVAILABLE_REMOTELY": parcel = wait_for_parcel(parcel.start_download(), api, parcel, cluster_name, 'DOWNLOADED') print "Distributing Anaconda parcel. This might take a while." if parcel.stage == "DOWNLOADED": parcel = wait_for_parcel(parcel.start_distribution(), api, parcel, cluster_name, 'DISTRIBUTED') print "Activating Anaconda parcel. This might take a while." if parcel.stage == "DISTRIBUTED": parcel = wait_for_parcel(parcel.activate(), api, parcel, cluster_name, 'ACTIVATED') print "Anaconda is now installed."