def _add_hosts_and_components( self, cluster_spec, servers, ambari_info, name): add_host_url = 'http://{0}/api/v1/clusters/{1}/hosts/{2}' add_host_component_url = ('http://{0}/api/v1/clusters/{1}' '/hosts/{2}/host_components/{3}') for host in servers: hostname = host.instance.fqdn().lower() result = self._post( add_host_url.format(ambari_info.get_address(), name, hostname), ambari_info) if result.status_code != 201: LOG.error( _LE('Create host command failed. {0}').format(result.text)) raise ex.HadoopProvisionError( _('Failed to add host: %s') % result.text) node_group_name = host.node_group.name # TODO(jspeidel): ensure that node group exists node_group = cluster_spec.node_groups[node_group_name] for component in node_group.components: # don't add any AMBARI components if component.find('AMBARI') != 0: result = self._post(add_host_component_url.format( ambari_info.get_address(), name, hostname, component), ambari_info) if result.status_code != 201: LOG.error( _LE('Create host_component command failed. %s'), result.text) raise ex.HadoopProvisionError( _('Failed to add host component: %s') % result.text)
def _install_services(self, cluster_name, ambari_info): LOG.info(_LI('Installing required Hadoop services ...')) ambari_address = ambari_info.get_address() install_url = ('http://{0}/api/v1/clusters/{' '1}/services?ServiceInfo/state=INIT'.format( ambari_address, cluster_name)) body = ('{"RequestInfo" : { "context" : "Install all services" },' '"Body" : {"ServiceInfo": {"state" : "INSTALLED"}}}') result = self._put(install_url, ambari_info, data=body) if result.status_code == 202: json_result = json.loads(result.text) request_id = json_result['Requests']['id'] success = self._wait_for_async_request(self._get_async_request_uri( ambari_info, cluster_name, request_id), ambari_info) if success: LOG.info(_LI("Install of Hadoop stack successful.")) self._finalize_ambari_state(ambari_info) else: LOG.critical(_LC('Install command failed.')) raise ex.HadoopProvisionError( _('Installation of Hadoop stack failed.')) elif result.status_code != 200: LOG.error( _LE('Install command failed. {0}').format(result.text)) raise ex.HadoopProvisionError( _('Installation of Hadoop stack failed.'))
def _await_cldb(self, cluster_context, instances=None, timeout=600): instances = instances or cluster_context.get_instances() cldb_node = cluster_context.get_instance(mfs.CLDB) start_time = timeutils.utcnow() retry_count = 0 with cldb_node.remote() as r: LOG.debug("Waiting {count} seconds for CLDB initialization".format( count=timeout)) while timeutils.delta_seconds(start_time, timeutils.utcnow()) < timeout: ec, out = r.execute_command(NODE_LIST_CMD, raise_when_error=False) resp = json.loads(out) status = resp['status'] if str(status).lower() == 'ok': ips = [n['ip'] for n in resp['data']] retry_count += 1 for i in instances: if (i.internal_ip not in ips and (retry_count > DEFAULT_RETRY_COUNT)): msg = _("Node failed to connect to CLDB: %s" ) % i.internal_ip raise ex.HadoopProvisionError(msg) break else: context.sleep(DELAY) else: raise ex.HadoopProvisionError(_("CLDB failed to start"))
def _exec_ambari_command(self, ambari_info, body, cmd_uri): LOG.debug('PUT URI: {0}'.format(cmd_uri)) result = self._put(cmd_uri, ambari_info, data=body) if result.status_code == 202: LOG.debug( 'PUT response: {0}'.format(result.text)) json_result = json.loads(result.text) href = json_result['href'] + '/tasks?fields=Tasks/status' success = self._wait_for_async_request(href, ambari_info) if success: LOG.info( _LI("Successfully changed state of Hadoop components ")) else: LOG.critical(_LC('Failed to change state of Hadoop ' 'components')) raise ex.HadoopProvisionError( _('Failed to change state of Hadoop components')) else: LOG.error( _LE('Command failed. Status: %(status)s, response: ' '%(response)s'), {'status': result.status_code, 'response': result.text}) raise ex.HadoopProvisionError(_('Hadoop/Ambari command failed.'))
def start_services(self, cluster_name, cluster_spec, ambari_info): start_url = ('http://{0}/api/v1/clusters/{1}/services?ServiceInfo/' 'state=INSTALLED'.format(ambari_info.get_address(), cluster_name)) body = ('{"RequestInfo" : { "context" : "Start all services" },' '"Body" : {"ServiceInfo": {"state" : "STARTED"}}}') self._fire_service_start_notifications(cluster_name, cluster_spec, ambari_info) result = self._put(start_url, ambari_info, data=body) if result.status_code == 202: json_result = json.loads(result.text) request_id = json_result['Requests']['id'] success = self._wait_for_async_request( self._get_async_request_uri(ambari_info, cluster_name, request_id), ambari_info) if success: LOG.info(_LI("Successfully started Hadoop cluster.")) LOG.info( _LI('Ambari server address: {server_address}').format( server_address=ambari_info.get_address())) else: LOG.error(_LE('Failed to start Hadoop cluster.')) raise ex.HadoopProvisionError( _('Start of Hadoop services failed.')) elif result.status_code != 200: LOG.error( _LE('Start command failed. Status: {status}, ' 'response: {response}').format(status=result.status_code, response=result.text)) raise ex.HadoopProvisionError( _('Start of Hadoop services failed.'))
def req_id(response): if not response.text: raise p_exc.HadoopProvisionError("Cannot find request id. " "No response body") body = jsonutils.loads(response.text) if "Requests" not in body or "id" not in body["Requests"]: raise p_exc.HadoopProvisionError("Cannot find request id. " "Unexpected response format") return body["Requests"]["id"]
def wrapper(*args, **kwargs): for cmd in f(*args, **kwargs): result = cmd.wait() if not result.success: if result.children is not None: for c in result.children: if not c.success: raise ex.HadoopProvisionError(c.resultMessage) else: raise ex.HadoopProvisionError(result.resultMessage)
def _start_cloudera_manager(cluster): manager = pu.get_manager(cluster) with manager.remote() as r: cmd.start_cloudera_db(r) cmd.start_manager(r) timeout = 300 LOG.debug("Waiting %(timeout)s seconds for Manager to start : " % {'timeout': timeout}) s_time = timeutils.utcnow() while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: try: conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT) conn.close() break except IOError: context.sleep(2) else: message = _("Cloudera Manager failed to start in %(timeout)s minutes " "on node '%(node)s' of cluster '%(cluster)s'") % { 'timeout': timeout / 60, 'node': manager.management_ip, 'cluster': cluster.name } raise ex.HadoopProvisionError(message) LOG.info(_LI("Cloudera Manager has been started"))
def start_cluster(cluster): cl_tmpl = { "blueprint": cluster.name, "default_password": uuidutils.generate_uuid(), "host_groups": [] } for ng in cluster.node_groups: for instance in ng.instances: cl_tmpl["host_groups"].append({ "name": instance.instance_name, "hosts": [{ "fqdn": instance.fqdn() }] }) ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) password = cluster.extra["ambari_password"] with ambari_client.AmbariClient(ambari, password=password) as client: req_id = client.create_cluster(cluster.name, cl_tmpl)["id"] while True: status = client.check_request_status(cluster.name, req_id) LOG.debug("Task %s in %s state. Completed %.1f%%" % (status["request_context"], status["request_status"], status["progress_percent"])) if status["request_status"] == "COMPLETED": return if status["request_status"] in ["IN_PROGRESS", "PENDING"]: context.sleep(5) else: raise p_exc.HadoopProvisionError( _("Ambari request in %s state") % status["request_status"])
def _create_config_obj(self, item, target='general', scope='cluster', high_priority=False): def _prepare_value(value): if isinstance(value, str): return value.strip().lower() return value conf_name = _prepare_value(item.get('name', None)) conf_value = _prepare_value(item.get('value', None)) if not conf_name: raise ex.HadoopProvisionError(_("Config missing 'name'")) if conf_value is None: raise ex.PluginInvalidDataException( _("Config '%s' missing 'value'") % conf_name) if high_priority or item.get('priority', 2) == 1: priority = 1 else: priority = 2 return p.Config( name=conf_name, applicable_target=target, scope=scope, config_type=item.get('config_type', "string"), config_values=item.get('config_values', None), default_value=conf_value, is_optional=item.get('is_optional', True), description=item.get('description', None), priority=priority)
def _set_ambari_credentials(self, cluster_spec, ambari_info, version): services = cluster_spec.services ambari_client = (self.version_factory.get_version_handler( version).get_ambari_client()) for service in services: if service.name == 'AMBARI': is_admin_provided = False admin_user = ambari_info.user admin_password = ambari_info.password for user in service.users: if user.name == 'admin': ambari_client.update_ambari_admin_user( user.password, ambari_info) is_admin_provided = True ambari_info.user = '******' ambari_info.password = user.password else: ambari_client.add_ambari_user(user, ambari_info) if 'admin' in user.groups: admin_user = user.name admin_password = user.password if not is_admin_provided: if admin_user is None: raise ex.HadoopProvisionError( _("An Ambari user in the" " admin group must be " "configured.")) ambari_info.user = admin_user ambari_info.password = admin_password ambari_client.delete_ambari_user('admin', ambari_info) break
def wait_ambari_requests(self, requests, cluster_name): requests = set(requests) failed = [] context.sleep(20) while len(requests) > 0: completed, not_completed = set(), set() for req_id in requests: request = self.get_request_info(cluster_name, req_id) status = request.get("request_status") if status == 'COMPLETED': completed.add(req_id) elif status in ['IN_PROGRESS', 'PENDING']: not_completed.add(req_id) else: failed.append(request) if failed: msg = _("Some Ambari request(s) " "not in COMPLETED state: %(description)s.") descrs = [] for req in failed: descr = _( "request %(id)d: %(name)s - in status %(status)s") descrs.append(descr % {'id': req.get("id"), 'name': req.get("request_context"), 'status': req.get("request_status")}) raise p_exc.HadoopProvisionError(msg % {'description': descrs}) requests = not_completed context.sleep(5) LOG.debug("Waiting for %d ambari request(s) to be completed", len(not_completed)) LOG.debug("All ambari requests have been completed")
def _check_ops(self, instance): expected_data = "sp@m and pony" with instance.remote() as r: actual_data = r.read_file_from('test_data', run_as_root=True) if actual_data.strip() != expected_data.strip(): raise pex.HadoopProvisionError("ACTUAL:\n%s\nEXPECTED:\n%s" % (actual_data, expected_data))
def delete_ambari_user(self, user_name, ambari_info): user_url = 'http://{0}/api/v1/users/{1}'.format( ambari_info.get_address(), user_name) result = self._delete(user_url, ambari_info) if result.status_code != 200: raise ex.HadoopProvisionError( _('Unable to delete Ambari user: %(user_name)s' ' : %(text)s') % {'user_name': user_name, 'text': result.text})
def _add_cluster(self, ambari_info, name): add_cluster_url = 'http://{0}/api/v1/clusters/{1}'.format( ambari_info.get_address(), name) result = self._post(add_cluster_url, ambari_info, data='{"Clusters": {"version" : "HDP-' + self.handler.get_version() + '"}}') if result.status_code != 201: LOG.error(_LE('Create cluster command failed. %s') % result.text) raise ex.HadoopProvisionError( _('Failed to add cluster: %s') % result.text)
def install_packages(remote, packages, timeout=1800): distrib = _get_os_distrib(remote) if distrib == 'ubuntu': cmd = 'RUNLEVEL=1 apt-get install -y %s' elif distrib == 'centos': cmd = 'yum install %s' else: raise ex.HadoopProvisionError( _("OS on image is not supported by CDH plugin")) cmd = cmd % ' '.join(packages) _root(remote, cmd, timeout=timeout)
def update_ambari_admin_user(self, password, ambari_info): old_pwd = ambari_info.password user_url = 'http://{0}/api/v1/users/admin'.format( ambari_info.get_address()) update_body = ('{{"Users":{{"roles":"admin","password":"******",' '"old_password":"******"}} }}'.format(password, old_pwd)) result = self._put(user_url, ambari_info, data=update_body) if result.status_code != 200: raise ex.HadoopProvisionError(_('Unable to update Ambari admin ' 'user credentials: {0}').format( result.text))
def wait_ambari_request(self, request_id, cluster_name): while True: status = self.check_request_status(cluster_name, request_id) LOG.debug("Task %s in %s state. Completed %.1f%%" % ( status["request_context"], status["request_status"], status["progress_percent"])) if status["request_status"] == "COMPLETED": return if status["request_status"] in ["IN_PROGRESS", "PENDING"]: context.sleep(5) else: raise p_exc.HadoopProvisionError( _("Ambari request in %s state") % status["request_status"])
def _start_processes(instance, processes): with instance.remote() as r: for process in processes: if process in ['namenode', 'datanode']: r.execute_command( 'sudo su - -c "hadoop-daemon.sh start %s" hadoop' % process) elif process in ['resourcemanager', 'nodemanager']: r.execute_command( 'sudo su - -c "yarn-daemon.sh start %s" hadoop' % process) else: raise ex.HadoopProvisionError( _("Process %s is not supported") % process)
def add_ambari_user(self, user, ambari_info): user_url = 'http://{0}/api/v1/users/{1}'.format( ambari_info.get_address(), user.name) create_body = ('{{"Users":{{"password":"******","roles":"{1}"}} }}'. format(user.password, '%s' % ','.join(map(str, user.groups)))) result = self._post(user_url, ambari_info, data=create_body) if result.status_code != 201: raise ex.HadoopProvisionError( _('Unable to create Ambari user: {0}').format(result.text))
def wait_host_registration(cluster): ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) hosts = plugin_utils.get_instances(cluster) password = cluster.extra["ambari_password"] with ambari_client.AmbariClient(ambari, password=password) as client: kwargs = {"client": client, "num_hosts": len(hosts)} poll_utils.poll(_check_host_registration, kwargs=kwargs, timeout=600) registered_hosts = client.get_registered_hosts() registered_host_names = [h["Hosts"]["host_name"] for h in registered_hosts] actual_host_names = [h.fqdn() for h in hosts] if sorted(registered_host_names) != sorted(actual_host_names): raise p_exc.HadoopProvisionError( _("Host registration fails in Ambari"))
def _add_configurations_to_cluster( self, cluster_spec, ambari_info, name): existing_config_url = ('http://{0}/api/v1/clusters/{1}?fields=' 'Clusters/desired_configs'.format( ambari_info.get_address(), name)) result = self._get(existing_config_url, ambari_info) json_result = json.loads(result.text) existing_configs = json_result['Clusters']['desired_configs'] configs = cluster_spec.get_deployed_configurations() if 'ambari' in configs: configs.remove('ambari') if len(configs) == len(existing_configs): # nothing to do return config_url = 'http://{0}/api/v1/clusters/{1}'.format( ambari_info.get_address(), name) body = {} clusters = {} version = 1 body['Clusters'] = clusters for config_name in configs: if config_name in existing_configs: if config_name == 'core-site' or config_name == 'global': existing_version = ( existing_configs[config_name]['tag'].lstrip('v')) version = int(existing_version) + 1 else: continue config_body = {} clusters['desired_config'] = config_body config_body['type'] = config_name config_body['tag'] = 'v%s' % version config_body['properties'] = ( cluster_spec.configurations[config_name]) result = self._put(config_url, ambari_info, data=json.dumps(body)) if result.status_code != 200: LOG.error( _LE('Set configuration command failed. {0}').format( result.text)) raise ex.HadoopProvisionError( _('Failed to set configurations on cluster: %s') % result.text)
def _add_services_to_cluster(self, cluster_spec, ambari_info, name): services = cluster_spec.services add_service_url = 'http://{0}/api/v1/clusters/{1}/services/{2}' for service in services: if service.deployed and service.name != 'AMBARI': result = self._post(add_service_url.format( ambari_info.get_address(), name, service.name), ambari_info) if result.status_code not in [201, 409]: LOG.error( _LE('Create service command failed. {0}').format( result.text)) raise ex.HadoopProvisionError( _('Failed to add services to cluster: %s') % result.text)
def _finalize_ambari_state(self, ambari_info): LOG.info(_LI('Finalizing Ambari cluster state.')) persist_state_uri = 'http://{0}/api/v1/persist'.format( ambari_info.get_address()) # this post data has non-standard format because persist # resource doesn't comply with Ambari API standards persist_data = ('{ "CLUSTER_CURRENT_STATUS":' '"{\\"clusterState\\":\\"CLUSTER_STARTED_5\\"}" }') result = self._post(persist_state_uri, ambari_info, data=persist_data) if result.status_code != 201 and result.status_code != 202: LOG.warning(_LW('Finalizing of Ambari cluster state failed. {0}'). format(result.text)) raise ex.HadoopProvisionError(_('Unable to finalize Ambari ' 'state.'))
def _add_components_to_services(self, cluster_spec, ambari_info, name): add_component_url = ('http://{0}/api/v1/clusters/{1}/services/{' '2}/components/{3}') for service in cluster_spec.services: if service.deployed and service.name != 'AMBARI': for component in service.components: result = self._post(add_component_url.format( ambari_info.get_address(), name, service.name, component.name), ambari_info) if result.status_code not in [201, 409]: LOG.error( _LE('Create component command failed. {0}').format( result.text)) raise ex.HadoopProvisionError( _('Failed to add components to services: %s') % result.text)
def install_rpms(self, r): LOG.debug("Installing rpm's") # TODO(jspeidel): based on image type, use correct command curl_cmd = ('curl -f -s -o /etc/yum.repos.d/ambari.repo %s' % self.ambari_rpm) ret_code, stdout = r.execute_command(curl_cmd, run_as_root=True, raise_when_error=False) if ret_code == 0: yum_cmd = 'yum -y install %s' % EPEL_RELEASE_PACKAGE_NAME r.execute_command(yum_cmd, run_as_root=True) else: LOG.debug("Unable to install rpm's from repo, " "checking for local install.") if not self.rpms_installed(): raise ex.HadoopProvisionError( _('Failed to install Hortonworks Ambari'))
def install_swift_integration(self, r): LOG.debug("Installing swift integration") base_rpm_cmd = 'rpm -U --quiet ' rpm_cmd = base_rpm_cmd + HADOOP_SWIFT_RPM ret_code, stdout = r.execute_command(rpm_cmd, run_as_root=True, raise_when_error=False) if ret_code != 0: LOG.debug("Unable to install swift integration from " "source, checking for local rpm.") ret_code, stdout = r.execute_command('ls ' + HADOOP_SWIFT_LOCAL_RPM, run_as_root=True, raise_when_error=False) if ret_code == 0: rpm_cmd = base_rpm_cmd + HADOOP_SWIFT_LOCAL_RPM r.execute_command(rpm_cmd, run_as_root=True) else: raise ex.HadoopProvisionError( _('Failed to install Hadoop Swift integration'))
def _start_components(self, ambari_info, auth, cluster_name, servers, cluster_spec): # query for all the host components in the INSTALLED state, # then get a list of the client services in the list installed_uri = ('http://{0}/api/v1/clusters/{' '1}/host_components?HostRoles/state=INSTALLED&' 'HostRoles/host_name.in({2})'.format( ambari_info.get_address(), cluster_name, self._get_host_list(servers))) result = self._get(installed_uri, ambari_info) if result.status_code == 200: LOG.debug( 'GET response: {0}'.format(result.text)) json_result = json.loads(result.text) items = json_result['items'] client_set = cluster_spec.get_components_for_type('CLIENT') inclusion_list = list(set([x['HostRoles']['component_name'] for x in items if x['HostRoles']['component_name'] not in client_set])) # query and start all non-client components on the given set of # hosts # TODO(jspeidel): Provide request context body = '{"HostRoles": {"state" : "STARTED"}}' start_uri = ('http://{0}/api/v1/clusters/{' '1}/host_components?HostRoles/state=INSTALLED&' 'HostRoles/host_name.in({2})' '&HostRoles/component_name.in({3})'.format( ambari_info.get_address(), cluster_name, self._get_host_list(servers), ",".join(inclusion_list))) self._exec_ambari_command(ambari_info, body, start_uri) else: raise ex.HadoopProvisionError( _('Unable to determine installed service ' 'components in scaled instances. status' ' code returned = {0}').format(result.status))
def await_agents(self, instances): api = self.get_api_client(instances[0].cluster) timeout = 300 LOG.debug("Waiting %(timeout)s seconds for agent connected to manager" % {'timeout': timeout}) s_time = timeutils.utcnow() while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: hostnames = [i.fqdn() for i in instances] hostnames_to_manager = [h.hostname for h in api.get_all_hosts('full')] is_ok = True for hostname in hostnames: if hostname not in hostnames_to_manager: is_ok = False break if not is_ok: context.sleep(5) else: break else: raise ex.HadoopProvisionError(_("Cloudera agents failed to connect" " to Cloudera Manager"))
def validate_cm_api_libs(self): if not self.have_cm_api_libs(): LOG.error(_LE("For provisioning cluster with CDH plugin install" " 'cm_api' package version 6.0.2 or later.")) raise ex.HadoopProvisionError(_("'cm_api' is not installed."))