def enable_rmha(conf): ha_hosts_map = {} standby_rm_key = "StandbyResourceManager" try: # Open a connection to CM and get a CM object api = util.get_api_handle(conf) cm = api.get_cloudera_manager() cl = None if 'cluster' in conf and 'name' in conf['cluster']: cl = api.get_cluster(conf['cluster']['name']) else: raise ProvisionatorException("No cluster specified") host_id_map = util.host_id_map(config, api) #todo: check if enabled already yarn_svc = config.get_cluster_service_by_type(conf, 'YARN') yarn = cl.get_service(yarn_svc['name']) standby_rm = yarn_svc['haconfig']['standby_rm'] ha_hosts_map[standby_rm_key] = standby_rm LOG.info("host_id_map: " + str(host_id_map)) LOG.info("ha_host_map: " + str(ha_hosts_map)) for ha_host in ha_hosts_map: LOG.info ("ha_hosts_map[ha_host]: " + ha_hosts_map[ha_host]) try: LOG.info("Host ID for " + ha_host + ": "+ host_id_map[ha_hosts_map[ha_host]]) except KeyError: LOG.error("Cannot find host " + ha_hosts_map[ha_host] + " for role \"" + ha_host + "\" in list of hosts (" + str(list(host_id_map)) + ")") return zk_svc = config.get_cluster_service_by_type(conf, 'ZOOKEEPER') zk_svc_name = zk_svc['name'] LOG.info("Running YARN HA configuration now...") # new_rm_host_id - ID of host where Standby Resource Manager will be created. # zk_service_name - Name of the ZooKeeper service to use for auto-failover. cmd = yarn.enable_rm_ha( host_id_map[standby_rm], zk_service_name=zk_svc_name ) util.wait_for_command(cmd, True) #TODO: in some cases time.sleep(x) is needed to avoid race conditions try: mgmt_svc = cm.get_service() LOG.info("Restarting management services") mgmt.restart(cm) except ApiException: pass LOG.info("Restarting cluster") cluster.restart(cl) except ApiException, e: raise ProvisionatorException(e)
def enable_kerberos(conf): try: # Open a connection to CM and get a CM object api = util.get_api_handle(conf) cm = api.get_cloudera_manager() cl = None if 'cluster' in conf and 'name' in conf['cluster']: cl = api.get_cluster(conf['cluster']['name']) else: raise ProvisionatorException("No cluster specified") # Check the HDFS service to see if Kerberos is already enabled hdfs_svc = config.get_cluster_service_by_type(conf, 'HDFS') hdfs = cl.get_service(hdfs_svc['name']) hdfs_cfg, hdfs_roletype_cfg = hdfs.get_config(view='full') if not hdfs_cfg['hadoop_security_authentication'].value == 'kerberos': # Kerberos has not been enabled - add the KDC creds if 'kdc_user' in conf['cm'] and 'kdc_pass' in conf['cm']: check_creds(conf['cm']['kdc_user'], conf['cm']['kdc_pass']) LOG.info("Importing credentials for administrative account") cmd = cm.import_admin_credentials(conf['cm']['kdc_user'], conf['cm']['kdc_pass']) util.wait_for_command(cmd, True) # OK let's do this datanode_transceiver_port = DATANODE_TRANSCEIVER_PORT datanode_web_port = DATANODE_WEB_PORT cm_cfg = cm.get_config() if 'SINGLE_USER_ENABLED' in cm_cfg: # TODO: don't hardcode this datanode_transceiver_port = 4004 datanode_web_port = 4006 LOG.info("Running Kerberos configuration now...") cmd = cl.configure_for_kerberos( datanode_transceiver_port=datanode_transceiver_port, datanode_web_port=datanode_web_port) util.wait_for_command(cmd, True) #TODO: in some cases time.sleep(x) is needed to avoid race conditions try: mgmt_svc = cm.get_service() LOG.info("Restarting management services") mgmt.restart(cm) except ApiException: pass LOG.info("Restarting cluster") cluster.restart(cl) else: LOG.info("Kerberos already enabled") except ApiException, e: raise ProvisionatorException(e)
def enable_kerberos(conf): try: # Open a connection to CM and get a CM object api = util.get_api_handle(conf) cm = api.get_cloudera_manager() cl = None if 'cluster' in conf and 'name' in conf['cluster']: cl = api.get_cluster(conf['cluster']['name']) else: raise ProvisionatorException("No cluster specified") # Check the HDFS service to see if Kerberos is already enabled hdfs_svc = config.get_cluster_service_by_type(conf, 'HDFS') hdfs = cl.get_service(hdfs_svc['name']) hdfs_cfg, hdfs_roletype_cfg = hdfs.get_config(view='full') if not hdfs_cfg['hadoop_security_authentication'].value == 'kerberos': # Kerberos has not been enabled - add the KDC creds if 'kdc_user' in conf['cm'] and 'kdc_pass' in conf['cm']: check_creds(conf['cm']['kdc_user'], conf['cm']['kdc_pass']) LOG.info("Importing credentials for administrative account") cmd = cm.import_admin_credentials(conf['cm']['kdc_user'], conf['cm']['kdc_pass']) util.wait_for_command(cmd, True) # OK let's do this datanode_transceiver_port = DATANODE_TRANSCEIVER_PORT datanode_web_port = DATANODE_WEB_PORT cm_cfg = cm.get_config() if 'SINGLE_USER_ENABLED' in cm_cfg: # TODO: don't hardcode this datanode_transceiver_port=4004 datanode_web_port=4006 LOG.info("Running Kerberos configuration now...") cmd = cl.configure_for_kerberos(datanode_transceiver_port=datanode_transceiver_port, datanode_web_port=datanode_web_port) util.wait_for_command(cmd, True) #TODO: in some cases time.sleep(x) is needed to avoid race conditions try: mgmt_svc = cm.get_service() LOG.info("Restarting management services") mgmt.restart(cm) except ApiException: pass LOG.info("Restarting cluster") cluster.restart(cl) else: LOG.info("Kerberos already enabled") except ApiException, e: raise ProvisionatorException(e)
def enable_hdfsha(conf): ha_hosts_map = {} active_namenode_key = "ActiveNamenode" standby_namenode_key = "StandbyNamenode" journalnode1_key = "JournalNode1" journalnode2_key = "JournalNode2" journalnode3_key = "JournalNode3" try: # Open a connection to CM and get a CM object api = util.get_api_handle(conf) cm = api.get_cloudera_manager() cl = None if 'cluster' in conf and 'name' in conf['cluster']: cl = api.get_cluster(conf['cluster']['name']) else: raise ProvisionatorException("No cluster specified") #todo: check if enabled already hdfs_svc = config.get_cluster_service_by_type(conf, 'HDFS') hdfs = cl.get_service(hdfs_svc['name']) hdfs_cfg, hdfs_roletype_cfg = hdfs.get_config(view='full') host_id_map = util.host_id_map(config, api) active_namenode = hdfs_svc['haconfig']['active_namenode'] standby_namenode = hdfs_svc['haconfig']['standby_namenode'] journalnode1 = hdfs_svc['haconfig']['journalnode1'] journalnode2 = hdfs_svc['haconfig']['journalnode2'] journalnode3 = hdfs_svc['haconfig']['journalnode3'] ns = hdfs_svc['haconfig']['ns'] jnEditsDir = hdfs_svc['haconfig']['jnEditsDir'] ha_hosts_map[active_namenode_key] = active_namenode ha_hosts_map[standby_namenode_key] = standby_namenode ha_hosts_map[journalnode1_key] = journalnode1 ha_hosts_map[journalnode2_key] = journalnode2 ha_hosts_map[journalnode3_key] = journalnode3 LOG.info("host_id_map: " + str(host_id_map)) LOG.info("ha_host_map: " + str(ha_hosts_map)) for ha_host in ha_hosts_map: LOG.info ("ha_hosts_map[ha_host]: " + ha_hosts_map[ha_host]) try: LOG.info("Host ID for " + ha_host + ": "+ host_id_map[ha_hosts_map[ha_host]]) except KeyError: LOG.error("Cannot find host " + ha_hosts_map[ha_host] + " for role \"" + ha_host + "\" in list of hosts (" + str(list(host_id_map)) + ")") return jns = [] jns.append({'jnHostId': host_id_map[journalnode1], 'jnName': journalnode1_key, 'jnEditsDir': jnEditsDir}) jns.append({'jnHostId': host_id_map[journalnode2], 'jnName': journalnode2_key, 'jnEditsDir': jnEditsDir}) jns.append({'jnHostId': host_id_map[journalnode3], 'jnName': journalnode3_key, 'jnEditsDir': jnEditsDir}) zk_svc = config.get_cluster_service_by_type(conf, 'ZOOKEEPER') zk_svc_name = zk_svc['name'] nn = hdfs.get_roles_by_type("NAMENODE") if not nn: raise ProvisionatorException("Could not find the active Namenode for current configuration") if len([instance.name for instance in nn]) > 1: LOG.warn("Found more than one Namnode in current configuration") nn_name = ([instance.name for instance in nn])[0] LOG.info("Active Namenode name in current config: " + nn_name) LOG.info("Running HDFS HA configuration now...") # standby_host_id - ID of host where Standby NameNode will be created. # nameservice - Nameservice to be used while enabling HA # jns - List of Journal Nodes. Each element must be a dict containing the following keys: # jnHostId: ID of the host where the new JournalNode will be created. # jnName: Name of the JournalNode role (optional) # jnEditsDir: Edits dir of the JournalNode. Can be omitted if the config is already set at RCG level. # zk_service_name - Name of the ZooKeeper service to use for auto-failover. cmd = hdfs.enable_nn_ha( nn_name, host_id_map[standby_namenode], ns, jns, standby_name_dir_list=None, qj_name=None, standby_name=None, active_fc_name=None, standby_fc_name=None, zk_service_name=zk_svc_name, force_init_znode=True, clear_existing_standby_name_dirs=True, clear_existing_jn_edits_dir=True ) util.wait_for_command(cmd, True) #TODO: in some cases time.sleep(x) is needed to avoid race conditions try: mgmt_svc = cm.get_service() LOG.info("Restarting management services") mgmt.restart(cm) except ApiException: pass LOG.info("Restarting cluster") cluster.restart(cl) except ApiException, e: raise ProvisionatorException(e)
def restart(cluster, exception_on_fail=True): cmd = cluster.restart(restart_only_stale_services=True, redeploy_client_configuration=True) util.wait_for_command(cmd, exception_on_fail)
def restart(cm, exception_on_fail=True): LOG.debug("Restarting management services") cmd = cm.get_service().restart() util.wait_for_command(cmd, exception_on_fail)
def restart(svc_config, cluster, exception_on_fail=False): service = cluster.get_service(svc_config['name']) cmd = service.restart() return util.wait_for_command(cmd, exception_on_fail)
_conf = role['config'] if to_interpolate: _conf = _interpolate_roles(_conf, cluster, host_id_map) role_config.update_config(_conf) # Get updated service object service = cluster.get_service(svc_config['name']) LOG.debug("Service %s stale: %s, %s" % (service.name, service.configStale, service.clientConfigStalenessStatus)) if new: # First run - this might fail for valid reasons for some services so add an additional start LOG.info("Performing first run actions for service %s" % svc_config['name']) cmd = service._cmd('firstRun', None, api_version=7) util.wait_for_command(cmd, True) if start_service: start(svc_config, cluster, True) elif start_service and service.configStale == 'true': LOG.info("Configuration stale for %s, restarting" % svc_config['name']) restart(svc_config, cluster, True) if service.clientConfigStalenessStatus == 'STALE': LOG.info("Client config stale for %s, redeploying" % svc_config['name']) cmd = service.deploy_client_config() util.wait_for_command(cmd, True) def start(svc_config, cluster, exception_on_fail=False): service = cluster.get_service(svc_config['name']) cmd = service.start() return util.wait_for_command(cmd, exception_on_fail)