def post_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing DataNode Stack Upgrade post-restart") import params env.set_params(params) hdfs_binary = self.get_hdfs_binary() # ensure the DataNode has started and rejoined the cluster datanode_upgrade.post_upgrade_check(hdfs_binary)
def producer_service(action='start'): import params process = params.program_name server = xmlrpclib.Server(params.producer_rpc) status = get_status(params.producer_rpc, process) res = False try: if action == 'start': if status != 'RUNNING': res = server.supervisor.startProcess(process) elif action == 'stop': if status == 'RUNNING': res = server.supervisor.stopProcess(process) elif action == 'restart': res = server.supervisor.restart() except xmlrpclib.Fault: Logger.error("supervisor: process %s not found" % process) except Exception as msg: Logger.error("supervisor: process %s: %s" % (process, str(msg))) if res: res = 'success' else: res = 'failed' Logger.info("Action '%s' completed: %s" % (action, res))
def pre_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing DataNode Stack Upgrade pre-restart") import params env.set_params(params) if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): stack_select.select_packages(params.version)
def service_check(self, env): import params env.set_params(params) for i in xrange(0, self.ATLAS_CONNECT_TRIES): try: conn = httplib.HTTPConnection(params.metadata_host, int(params.metadata_port)) conn.request("GET", format("http://{params.metadata_host}:{params.metadata_port}/")) except (httplib.HTTPException, socket.error) as ex: if i < self.ATLAS_CONNECT_TRIES - 1: time.sleep(self.ATLAS_CONNECT_TIMEOUT) Logger.info("Connection failed. Next retry in %s seconds." % (self.ATLAS_CONNECT_TIMEOUT)) continue else: raise Fail("Service check has failed.") resp = conn.getresponse() if resp.status == 200 : Logger.info('Atlas server up and running') else: Logger.debug('Atlas server not running') raise ComponentIsNotRunning()
def get_logsearch_meta_configs(configurations): logsearch_meta_configs = {} for key, value in configurations.iteritems( ): # iter on both keys and values if str(key).endswith('logsearch-conf'): logsearch_meta_configs[key] = value Logger.info("Found logsearch config entry : " + key) return logsearch_meta_configs
def pre_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing DataNode Stack Upgrade pre-restart") import params env.set_params(params) if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): conf_select.select(params.stack_name, "hadoop", params.version) stack_select.select("hadoop-hdfs-datanode", params.version)
def get_logfeeder_metadata(logsearch_meta_configs): """ get logfeeder pattern metadata list, an element: (e.g.) : ['service_config_name' : 'pattern json content'] """ logfeeder_contents = {} for key, value in logsearch_meta_configs.iteritems(): if 'content' in logsearch_meta_configs[key]: logfeeder_contents[key] = logsearch_meta_configs[key]['content'] Logger.info("Found logfeeder pattern content in " + key) return logfeeder_contents
def __parse_component_mappings(component_mappings): components = list() component_mappings_list = component_mappings.split(';') if component_mappings_list and len(component_mappings_list) > 0: metadata_list = map(lambda x: x.split(':'), component_mappings_list) if metadata_list and len(metadata_list) > 0: for metadata in metadata_list: if (len(metadata) == 2): logids = metadata[1].split(',') components.extend(logids) Logger.info( "Found logids for logsearch component %s - (%s) " % (metadata[0], metadata[1])) return components
def service_check(self, env): import params env.set_params(params) if params.security_enabled: Execute(format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}"), user=params.metadata_user) try: Execute(params.smoke_cmd, user=params.metadata_user, tries = 5, try_sleep = 10) Logger.info('Atlas server up and running') except: Logger.debug('Atlas server not running')
def service_check(self, env): import params env.set_params(params) server = xmlrpclib.Server(params.producer_rpc) info = dict() try: info = server.supervisor.getProcessInfo(params.program_name) except xmlrpclib.Fault: Logger.error("supervisor: process %s not found" % params.program_name) except Exception as msg: Logger.error("supervisor: process %s: %s" % (params.program_name, str(msg))) status = info.get('statename', None) # if status != 'RUNNING': # raise Fail("supervisor: process %s in %s state" % (params.program_name, status)) Logger.info("Status of process %s is %s" % (params.program_name, status))
def service_check(self, env): import params env.set_params(params) if params.security_enabled: Execute(format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}" ), user=params.smoke_test_user) try: Execute(params.smoke_cmd, user=params.smoke_test_user, tries=5, try_sleep=10) Logger.info('Atlas server up and running') except: Logger.debug('Atlas server not running')
def check_datanode_shutdown(self, hdfs_binary): """ Checks that a DataNode is down by running "hdfs dfsamin getDatanodeInfo" several times, pausing in between runs. Once the DataNode stops responding this method will return, otherwise it will raise a Fail(...) and retry automatically. The stack defaults for retrying for HDFS are also way too slow for this command; they are set to wait about 45 seconds between client retries. As a result, a single execution of dfsadmin will take 45 seconds to retry and the DataNode may be marked as dead, causing problems with HBase. https://issues.apache.org/jira/browse/HDFS-8510 tracks reducing the times for ipc.client.connect.retry.interval. In the meantime, override them here, but only for RU. :param hdfs_binary: name/path of the HDFS binary to use :return: """ import params # override stock retry timeouts since after 30 seconds, the datanode is # marked as dead and can affect HBase during RU dfsadmin_base_command = get_dfsadmin_base_command(hdfs_binary) command = format( '{dfsadmin_base_command} -D ipc.client.connect.max.retries=5 -D ipc.client.connect.retry.interval=1000 -getDatanodeInfo {dfs_dn_ipc_address}' ) is_datanode_deregistered = False try: shell.checked_call(command, user=params.hdfs_user, tries=1) except: is_datanode_deregistered = True if not is_datanode_deregistered: Logger.info( "DataNode has not yet deregistered from the NameNode...") raise Fail( 'DataNode has not yet deregistered from the NameNode...') Logger.info("DataNode has successfully shutdown.") return True
def reload_configs(self, env): import params env.set_params(params) Logger.info("RELOAD CONFIGS") reconfig("datanode", params.dfs_dn_ipc_address)
def metadata(): import params Directory([params.pid_dir], mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) Directory(params.conf_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) Directory(params.log_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) Directory(params.data_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) Directory(params.expanded_war_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) metadata_war_file = format('{params.metadata_home}/server/webapp/metadata.war') if not os.path.isfile(metadata_war_file): raise Fail("Unable to copy {0} because it does not exist".format(metadata_war_file)) Logger.info("Copying {0} to {1}".format(metadata_war_file, params.expanded_war_dir)) shutil.copy2(metadata_war_file, params.expanded_war_dir) File(format('{conf_dir}/application.properties'), content=InlineTemplate(params.application_properties_content), mode=0644, owner=params.metadata_user, group=params.user_group ) File(format("{conf_dir}/metadata-env.sh"), owner=params.metadata_user, group=params.user_group, mode=0755, content=InlineTemplate(params.metadata_env_content) ) File(format("{conf_dir}/log4j.xml"), mode=0644, owner=params.metadata_user, group=params.user_group, content=StaticFile('log4j.xml') )