def perform_grafana_post_call(url, payload, server): response = None data = None userAndPass = b64encode('{0}:{1}'.format(server.user, server.password)) Logger.debug('POST payload: %s' % payload) headers = { "Content-Type": "application/json", "Content-Length": len(payload), 'Authorization': 'Basic %s' % userAndPass } grafana_https_enabled = server.protocol.lower() == 'https' ca_certs = None if grafana_https_enabled: import params ca_certs = params.ams_grafana_cert_file for i in xrange(0, GRAFANA_CONNECT_TRIES): try: Logger.info("Connecting (POST) to %s:%s%s" % (server.host, server.port, url)) conn = network.get_http_connection(server.host, int(server.port), grafana_https_enabled, ca_certs) conn.request("POST", url, payload, headers) response = conn.getresponse() Logger.info("Http response: %s %s" % (response.status, response.reason)) if response.status == 401: #Intermittent error thrown from Grafana if i < GRAFANA_CONNECT_TRIES - 1: time.sleep(GRAFANA_CONNECT_TIMEOUT) Logger.info( "Connection to Grafana failed. Next retry in %s seconds." % (GRAFANA_CONNECT_TIMEOUT)) continue data = response.read() Logger.info("Http data: %s" % data) conn.close() break except (httplib.HTTPException, socket.error) as ex: if i < GRAFANA_CONNECT_TRIES - 1: time.sleep(GRAFANA_CONNECT_TIMEOUT) Logger.info( "Connection to Grafana failed. Next retry in %s seconds." % (GRAFANA_CONNECT_TIMEOUT)) continue else: raise Fail("Ambari Metrics Grafana update failed due to: %s" % str(ex)) pass return (response, data)
def doRetries(hdfs_site, security_enabled, run_user): doRetries.attempt += 1 active_namenodes, standby_namenodes, unknown_namenodes = get_namenode_states_noretries(hdfs_site, security_enabled, run_user, doRetries.attempt == times) Logger.info( "NameNode HA states: active_namenodes = {0}, standby_namenodes = {1}, unknown_namenodes = {2}".format( active_namenodes, standby_namenodes, unknown_namenodes)) if active_namenodes: return active_namenodes, standby_namenodes, unknown_namenodes elif doRetries.attempt == times: Logger.warning("No active NameNode was found after {0} retries. Will return current NameNode HA states".format(times)) return active_namenodes, standby_namenodes, unknown_namenodes raise Fail('No active NameNode was found.')
def check_and_log(self, env, message): import params file_location = params.file_location datetime_format = params.datetime_format if not os.path.isfile(file_location) or not os.access(file_location, os.W_OK): raise Fail("File does not exist or not writable. %s" % file_location) with open(file_location, "a") as logfile: logfile.write("Time: " + datetime.utcnow().strftime(datetime_format) + "\n") logfile.write("Log: " + message + "\n") logfile.write("---------------\n")
def _assert_download_valid(self): source = self.main_resource.resource.source type = self.main_resource.resource.type target = self.main_resource.resource.target if source: self.source_status = self._get_file_status(source) if self.source_status == None: raise Fail(format("Source {source} doesn't exist")) if type == "directory" and self.source_status['type'] == "FILE": raise Fail( format("Source {source} is file but type is {type}")) elif type == "file" and self.source_status['type'] == "DIRECTORY": raise Fail( format("Source {source} is directory but type is {type}")) else: raise Fail(format("No source provided")) if os.path.exists(target): if type == "directory" and os.path.isfile(target): raise Fail( format( "Trying to download directory but file exists locally {target}" )) elif type == "file" and os.path.isdir(target): raise Fail( format( "Trying to download file but directory exists locally {target}" ))
def run_command(self, target, operation, method='POST', assertable_result=True, file_to_put=None, ignore_status_codes=[], **kwargs): """ assertable_result - some POST requests return '{"boolean":false}' or '{"boolean":true}' depending on if query was successful or not, we can assert this for them """ target = HdfsResourceProvider.parse_path(target) url = format("{address}/webhdfs/v1{target}?op={operation}&user.name={run_user}", address=self.address, run_user=self.run_user) for k,v in kwargs.iteritems(): url = format("{url}&{k}={v}") if file_to_put and not os.path.exists(file_to_put): raise Fail(format("File {file_to_put} is not found.")) cmd = ["curl", "-sS","-L", "-w", "%{http_code}", "-X", method] if file_to_put: cmd += ["-T", file_to_put] if self.security_enabled: cmd += ["--negotiate", "-u", ":"] if self.is_https_enabled: cmd += ["-k"] cmd.append(url) _, out, err = get_user_call_output(cmd, user=self.run_user, logoutput=self.logoutput, quiet=False) status_code = out[-3:] out = out[:-3] # remove last line from output which is status code try: result_dict = json.loads(out) except ValueError: result_dict = out if status_code not in WebHDFSUtil.valid_status_codes+ignore_status_codes or assertable_result and result_dict and not result_dict['boolean']: formatted_output = json.dumps(result_dict, indent=2) if isinstance(result_dict, dict) else result_dict formatted_output = err + "\n" + formatted_output err_msg = "Execution of '%s' returned status_code=%s. %s" % (shell.string_cmd_from_args_list(cmd), status_code, formatted_output) raise Fail(err_msg) return result_dict
def create_grafana_admin_pwd(): import params serverCall1 = Server(protocol = params.ams_grafana_protocol.strip(), host = params.ams_grafana_host.strip(), port = params.ams_grafana_port, user = params.ams_grafana_admin_user, password = params.ams_grafana_admin_pwd) response = perform_grafana_get_call(GRAFANA_USER_URL, serverCall1) if response and response.status != 200: serverCall2 = Server(protocol = params.ams_grafana_protocol.strip(), host = params.ams_grafana_host.strip(), port = params.ams_grafana_port, user = params.ams_grafana_admin_user, password = '******') Logger.debug("Setting grafana admin password") pwd_data = { "oldPassword": "******", "newPassword": params.ams_grafana_admin_pwd, "confirmNew": params.ams_grafana_admin_pwd } password_json = json.dumps(pwd_data) (response, data) = perform_grafana_put_call(GRAFANA_USER_URL, 'password', password_json, serverCall2) if response.status == 200: Logger.info("Ambari Metrics Grafana password updated.") elif response.status == 500: Logger.info("Ambari Metrics Grafana password update failed. Not retrying.") raise Fail("Ambari Metrics Grafana password update failed. PUT request status: %s %s \n%s" % (response.status, response.reason, data)) else: raise Fail("Ambari Metrics Grafana password creation failed. " "PUT request status: %s %s \n%s" % (response.status, response.reason, data)) else: Logger.info("Grafana password update not required.") pass
def is_mounted(self): if not os.path.exists(self.resource.mount_point): return False if self.resource.device and not os.path.exists(self.resource.device): raise Fail("%s Device %s does not exist" % (self, self.resource.device)) mounts = get_mounted() for m in mounts: if m['mount_point'] == self.resource.mount_point: return True return False
def service_check(self, env): from resource_management.libraries.functions.windows_service_utils import check_windows_service_exists from service_mapping import collector_win_service_name, monitor_win_service_name import params env.set_params(params) #Just check that the services were correctly installed #Check the monitor on all hosts Logger.info("Metrics Monitor service check was started.") if not check_windows_service_exists(monitor_win_service_name): raise Fail( "Metrics Monitor service was not properly installed. Check the logs and retry the installation." ) #Check the collector only where installed if params.ams_collector_home_dir and os.path.isdir( params.ams_collector_home_dir): Logger.info("Metrics Collector service check was started.") if not check_windows_service_exists(collector_win_service_name): raise Fail( "Metrics Collector service was not properly installed. Check the logs and retry the installation." )
def get_property_for_active_namenode(hdfs_site, name_service, property_name, security_enabled, run_user): """ For dfs.namenode.rpc-address: - In non-ha mode it will return hdfs_site[dfs.namenode.rpc-address] - In ha-mode it will return hdfs_site[dfs.namenode.rpc-address.nnha.nn2], where nnha is the name of HA, and nn2 is id of active NN """ value = None rpc_key = None if _is_ha_config(hdfs_site): name_services = get_nameservices(hdfs_site) if name_service not in name_services: raise Fail( 'Trying to get property for non-existing ns=\'{1}\'. Valid namespaces are {2}' .format(property_name, name_service, ','.join(name_services))) active_namenodes = get_namenode_states(hdfs_site, security_enabled, run_user)[0] if not len(active_namenodes): raise Fail("There is no active namenodes.") active_namenode_id = active_namenodes[0][0] value = hdfs_site[format( "{property_name}.{name_service}.{active_namenode_id}")] rpc_key = NAMENODE_RPC_FRAGMENT.format(name_service, active_namenode_id) else: value = hdfs_site[property_name] rpc_key = NAMENODE_RPC_NON_HA if INADDR_ANY in value and rpc_key in hdfs_site: rpc_value = str(hdfs_site[rpc_key]) if INADDR_ANY not in rpc_value: rpc_host = rpc_value.split(":")[0] value = value.replace(INADDR_ANY, rpc_host) return value
def _assert_valid(self): source = self.main_resource.resource.source type = self.main_resource.resource.type target = self.main_resource.resource.target if source: if not os.path.exists(source): raise Fail(format("Source {source} doesn't exist")) if type == "directory" and os.path.isfile(source): raise Fail( format("Source {source} is file but type is {type}")) elif type == "file" and os.path.isdir(source): raise Fail( format("Source {source} is directory but type is {type}")) self.target_status = self._get_file_status(target) if self.target_status and self.target_status['type'].lower() != type: raise Fail( format( "Trying to create file/directory but directory/file exists in the DFS on {target}" ))
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) if self.resource.path != []: if not self.resource.environment: self.resource.environment = {} self.resource.environment['PATH'] = os.pathsep.join(self.resource.path) for i in range(0, self.resource.tries): try: code, _, _ = _call_command(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout, user=self.resource.user, pid_file_name=self.resource.pid_file, poll_after=self.resource.poll_after) if code != 0 and not self.resource.ignore_failures: raise Fail("Failed to execute " + self.resource.command) break except Fail as ex: Logger.info("Error raised: %s" % str(ex)) if i == self.resource.tries - 1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep) except ExecuteTimeoutException: err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % ( self.resource.command, self.resource.timeout) if self.resource.on_timeout: Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg)) _call_command(self.resource.on_timeout) else: raise Fail(err_msg)
def call_curl_krb_request(tmp_dir, user_keytab, user_princ, uri, kinit_path, user, connection_timeout, method='GET', metric_json='', header='', tries=1, current_time=0, random_value=0): if method == 'POST': Logger.info("Generated metrics for %s:\n%s" % (uri, metric_json)) for i in xrange(0, tries): try: Logger.info("Connecting (%s) to %s" % (method, uri)) response = None errmsg = None time_millis = 0 response, errmsg, time_millis = curl_krb_request( tmp_dir, user_keytab, user_princ, uri, 'ams_service_check', kinit_path, False, "AMS Service Check", user, connection_timeout=connection_timeout, kinit_timer_ms=0, method=method, body=metric_json, header=header) except Exception, exception: if i < tries - 1: #range/xrange returns items from start to end-1 time.sleep(connection_timeout) Logger.info( "Connection failed for %s. Next retry in %s seconds." % (uri, connection_timeout)) continue else: raise Fail( "Unable to {0} metrics on: {1}. Exception: {2}".format( method, uri, str(exception))) finally:
def action_create(self): path = self.resource.path if os.path.isdir(path): raise Fail("Applying %s failed, directory with name %s exists" % (self.resource, path)) dirname = os.path.dirname(path) if not os.path.isdir(dirname): raise Fail( "Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname)) write = False content = self._get_content() if not os.path.exists(path): write = True reason = "it doesn't exist" elif self.resource.replace: if content is not None: with open(path, "rb") as fp: old_content = fp.read() if content != old_content: write = True reason = "contents don't match" if self.resource.backup: self.resource.env.backup_file(path) if write: Logger.info("Writing %s because %s" % (self.resource, reason)) with open(path, "wb") as fp: if content: fp.write(content) _ensure_metadata(self.resource.path, self.resource.owner, self.resource.group, mode=self.resource.mode)
def get_mounted(self): p = Popen("mount", stdout=PIPE, stderr=STDOUT, shell=True) out = p.communicate()[0] if p.wait() != 0: raise Fail("[%s] Getting list of mounts (calling mount) failed" % self) mounts = [x.split(' ') for x in out.strip().split('\n')] return [dict( device=m[0], mount_point=m[2], fstype=m[4], options=m[5][1:-1].split(','), ) for m in mounts if m[1] == "on" and m[3] == "type"]
def action_create(self): path = self.resource.path if not os.path.exists(path): Logger.info("Creating directory %s" % self.resource) if self.resource.recursive: os.makedirs(path, self.resource.mode or 0755) else: dirname = os.path.dirname(path) if not os.path.isdir(dirname): raise Fail( "Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname)) os.mkdir(path, self.resource.mode or 0755) if not os.path.isdir(path): raise Fail("Applying %s failed, file %s already exists" % (self.resource, path)) _ensure_metadata(path, self.resource.owner, self.resource.group, mode=self.resource.mode)
def action_create(self): path = self.resource.path if sudo.path_isdir(path): raise Fail("Applying %s failed, directory with name %s exists" % (self.resource, path)) dirname = os.path.dirname(path) if not sudo.path_isdir(dirname): raise Fail( "Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname)) write = False content = self._get_content() if not sudo.path_exists(path): write = True reason = "it doesn't exist" elif self.resource.replace: if content is not None: old_content = sudo.read_file(path, encoding=self.resource.encoding) if content != old_content: write = True reason = "contents don't match" if self.resource.backup: self.resource.env.backup_file(path) if write: Logger.info("Writing %s because %s" % (self.resource, reason)) sudo.create_file(path, content, encoding=self.resource.encoding) _ensure_metadata(self.resource.path, self.resource.owner, self.resource.group, mode=self.resource.mode, cd_access=self.resource.cd_access)
def get_property_for_active_namenode(hdfs_site, property_name, security_enabled, run_user): """ For dfs.namenode.rpc-address: - In non-ha mode it will return hdfs_site[dfs.namenode.rpc-address] - In ha-mode it will return hdfs_site[dfs.namenode.rpc-address.nnha.nn2], where nnha is the name of HA, and nn2 is id of active NN - In federated mode it fails since there is more than one active namenode """ value = None rpc_key = None if _is_ha_config(hdfs_site): name_services = get_nameservices(hdfs_site) if len(name_services) > 1: raise Fail('Multiple name services not supported by this function') name_service = name_services(hdfs_site)[0] active_namenodes = get_namenode_states(hdfs_site, security_enabled, run_user)[0] if not len(active_namenodes): raise Fail("There is no active namenodes.") active_namenode_id = active_namenodes[0][0] value = hdfs_site[format( "{property_name}.{name_service}.{active_namenode_id}")] rpc_key = NAMENODE_RPC_FRAGMENT.format(name_service, active_namenode_id) else: value = hdfs_site[property_name] rpc_key = NAMENODE_RPC_NON_HA if INADDR_ANY in value and rpc_key in hdfs_site: rpc_value = str(hdfs_site[rpc_key]) if INADDR_ANY not in rpc_value: rpc_host = rpc_value.split(":")[0] value = value.replace(INADDR_ANY, rpc_host) return value
def action_create(self): path = self.resource.path if not sudo.path_exists(path): Logger.info("Creating directory %s" % self.resource) # dead links should be followed, else we gonna have failures on trying to create directories on top of them. if self.resource.follow: followed_links = [] while sudo.path_lexists(path): if path in followed_links: raise Fail("Applying %s failed, looped symbolic links found while resolving %s" % (self.resource, path)) followed_links.append(path) path = sudo.readlink(path) if path != self.resource.path: Logger.info("Following the link {0} to {1} to create the directory".format(self.resource.path, path)) if self.resource.recursive: if self.resource.recursive_permission: DirectoryProvider.makedirs_and_set_permission_recursively(path, self.resource.owner, self.resource.group, self.resource.mode) else: sudo.makedirs(path, self.resource.mode or 0755) else: dirname = os.path.dirname(path) if not sudo.path_isdir(dirname): raise Fail("Applying %s failed, parent directory %s doesn't exist" % (self.resource, dirname)) sudo.makedir(path, self.resource.mode or 0755) if not sudo.path_isdir(path): raise Fail("Applying %s failed, file %s already exists" % (self.resource, path)) _ensure_metadata(path, self.resource.owner, self.resource.group, mode=self.resource.mode, cd_access=self.resource.cd_access)
def action_create(self): path = self.resource.path if os.path.lexists(path): oldpath = os.path.realpath(path) if oldpath == self.resource.to: return if not os.path.islink(path): raise Fail( "%s trying to create a symlink with the same name as an existing file or directory" % self) self.log.info("%s replacing old symlink to %s" % (self.resource, oldpath)) os.unlink(path) if self.resource.hard: if not os.path.exists(self.resource.to): raise Fail( "Failed to apply %s, linking to nonexistent location %s" % (self.resource, self.resource.to)) if os.path.isdir(self.resource.to): raise Fail( "Failed to apply %s, cannot create hard link to a directory (%s)" % (self.resource, self.resource.to)) self.log.info("Creating hard %s" % self.resource) os.link(self.resource.to, path) self.resource.updated() else: if not os.path.exists(self.resource.to): self.log.info("Warning: linking to nonexistent location %s", self.resource.to) self.log.info("Creating symbolic %s" % self.resource) os.symlink(self.resource.to, path) self.resource.updated()
def action_enable(self): if self.is_enabled(): Logger.debug("%s already enabled" % self) else: if not self.resource.device: raise Fail( "[%s] device not set but required for enable action" % self) if not self.resource.fstype: raise Fail( "[%s] fstype not set but required for enable action" % self) with open("/etc/fstab", "a") as fp: fp.write("%s %s %s %s %d %d\n" % ( self.resource.device, self.resource.mount_point, self.resource.fstype, ",".join(self.resource.options or ["defaults"]), self.resource.dump, self.resource.passno, )) Logger.info("%s enabled" % self)
def service_check(self, env): import params Logger.info("Ambari Metrics service check was started.") env.set_params(params) results = execute_in_parallel(self.service_check_for_single_host, params.ams_collector_hosts.split(','), params) for host in str(params.ams_collector_hosts).split(","): if host in results: if results[host].status == SUCCESS: Logger.info("Ambari Metrics service check passed on host " + host) return else: Logger.warning(results[host].result) raise Fail("All metrics collectors are unavailable.")
def enable(self): hSCM = safe_open_scmanager() try: hSvc = safe_open_service(hSCM, self.resource.service_name) if win32service.QueryServiceConfig( hSvc)[1] == win32service.SERVICE_DISABLED: win32service.ChangeServiceConfig( hSvc, win32service.SERVICE_NO_CHANGE, win32service.SERVICE_DEMAND_START, win32service.SERVICE_NO_CHANGE, None, None, 0, None, None, None, None) win32service.CloseServiceHandle(hSvc) except win32api.error, details: raise Fail("Error enabling service {0}: {1}".format( self.resource.service_name, details.winerror))
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) if self.resource.path != []: if not self.resource.environment: self.resource.environment = {} self.resource.environment['PATH'] = os.pathsep.join( self.resource.path) for i in range(0, self.resource.tries): try: shell.checked_call( self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout, pid_file=self.resource.pid_file, poll_after=self.resource.poll_after) break except Fail as ex: if i == self.resource.tries - 1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep) except ExecuteTimeoutException: err_msg = ( "Execution of '%s' was killed due timeout after %d seconds" ) % (self.resource.command, self.resource.timeout) if self.resource.on_timeout: Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg)) shell.checked_call(self.resource.on_timeout) else: raise Fail(err_msg)
def get_property_for_active_namenode(hdfs_site, property_name, security_enabled, run_user): """ For dfs.namenode.rpc-address: - In non-ha mode it will return hdfs_site[dfs.namenode.rpc-address] - In ha-mode it will return hdfs_site[dfs.namenode.rpc-address.nnha.nn2], where nnha is the name of HA, and nn2 is id of active NN """ if is_ha_enabled(hdfs_site): name_service = hdfs_site['dfs.nameservices'] active_namenodes = get_namenode_states(hdfs_site, security_enabled, run_user)[0] if not len(active_namenodes): raise Fail("There is no active namenodes.") active_namenode_id = active_namenodes[0][0] return hdfs_site[format("{property_name}.{name_service}.{active_namenode_id}")] else: return hdfs_site[property_name]
def perform_grafana_delete_call(url, server): import params grafana_https_enabled = server.protocol.lower() == 'https' response = None ca_certs = None if grafana_https_enabled: ca_certs = params.ams_grafana_ca_cert for i in xrange(0, params.grafana_connect_attempts): try: conn = network.get_http_connection( server.host, int(server.port), grafana_https_enabled, ca_certs, ssl_version=Script.get_force_https_protocol_value()) userAndPass = b64encode('{0}:{1}'.format(server.user, server.password)) headers = {'Authorization': 'Basic %s' % userAndPass} Logger.info("Connecting (DELETE) to %s:%s%s" % (server.host, server.port, url)) conn.request("DELETE", url, headers=headers) response = conn.getresponse() Logger.info("Http response: %s %s" % (response.status, response.reason)) break except (httplib.HTTPException, socket.error) as ex: if i < params.grafana_connect_attempts - 1: Logger.info( "Connection to Grafana failed. Next retry in %s seconds." % (params.grafana_connect_retry_delay)) time.sleep(params.grafana_connect_retry_delay) continue else: raise Fail("Ambari Metrics Grafana update failed due to: %s" % str(ex)) pass return response
def action_change_user(self): hSCM = safe_open_scmanager() try: hSvc = safe_open_service(hSCM, self.resource.service_name) self._fix_user_name() try: win32service.ChangeServiceConfig( hSvc, win32service.SERVICE_NO_CHANGE, win32service.SERVICE_NO_CHANGE, win32service.SERVICE_NO_CHANGE, None, None, 0, None, self.resource.username, self.resource.password, None) except win32api.error, details: raise Fail("Error changing user for service {0}: {1}".format( self.resource.service_name, details.winerror)) finally: win32service.CloseServiceHandle(hSvc)
def perform_grafana_put_call(url, id, payload, server): import params response = None data = None userAndPass = b64encode('{0}:{1}'.format(server.user, server.password)) headers = { "Content-Type": "application/json", 'Authorization': 'Basic %s' % userAndPass } grafana_https_enabled = server.protocol.lower() == 'https' ca_certs = None if grafana_https_enabled: ca_certs = params.ams_grafana_ca_cert for i in xrange(0, params.grafana_connect_attempts): try: conn = network.get_http_connection( server.host, int(server.port), grafana_https_enabled, ca_certs, ssl_version=Script.get_force_https_protocol_value()) conn.request("PUT", url + "/" + str(id), payload, headers) response = conn.getresponse() data = response.read() Logger.info("Http data: %s" % data) conn.close() break except (httplib.HTTPException, socket.error) as ex: if i < params.grafana_connect_attempts - 1: Logger.info( "Connection to Grafana failed. Next retry in %s seconds." % (params.grafana_connect_retry_delay)) time.sleep(params.grafana_connect_retry_delay) continue else: raise Fail("Ambari Metrics Grafana update failed due to: %s" % str(ex)) pass return (response, data)
def get_hdfs_cluster_id_from_jmx(hdfs_site, security_enabled, run_user): name_services = get_nameservices(hdfs_site) for name_service in name_services: for nn_unique_id, address, jmx_uri in all_jmx_namenode_addresses( hdfs_site, name_service): jmx_uri = jmx_uri.format(JMX_BEAN_NN_INFO) is_https_enabled = is_https_enabled_in_hdfs( hdfs_site['dfs.http.policy'], hdfs_site['dfs.https.enable']) state = get_value_from_jmx(jmx_uri, 'ClusterId', security_enabled, run_user, is_https_enabled) if state: return state Logger.info("Cannot get clusterId from {0}".format(jmx_uri)) raise Fail( "Cannot get clsuterId from jmx, since none of the namenodes is running/accessible via jmx." )
def action_create(self): path = self.resource.path if not os.path.exists(path): self.log.info("Creating directory %s" % self.resource) if self.resource.recursive: os.makedirs(path, self.resource.mode or 0755) else: os.mkdir(path, self.resource.mode or 0755) self.resource.updated() if not os.path.isdir(path): raise Fail("Applying %s failed, file %s already exists" % (self.resource, path)) if _ensure_metadata(path, self.resource.owner, self.resource.group, mode=self.resource.mode, log=self.log): self.resource.updated()
def action_run(self): if self.resource.creates: if sudo.path_exists(self.resource.creates): Logger.info("Skipping %s due to creates" % self.resource) return env = self.resource.environment for i in range(0, self.resource.tries): try: shell.checked_call( self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=env, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout, path=self.resource.path, sudo=self.resource.sudo, on_new_line=self.resource.on_new_line) break except Fail as ex: if i == self.resource.tries - 1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep) except ExecuteTimeoutException: err_msg = ( "Execution of '%s' was killed due timeout after %d seconds" ) % (self.resource.command, self.resource.timeout) if self.resource.on_timeout: Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg)) shell.checked_call(self.resource.on_timeout) else: raise Fail(err_msg)