def cmd_target_names(): import params if len(params.flume_command_targets) > 0: return params.flume_command_targets else: return find_expected_agent_names(params.flume_conf_dir)
def cmd_target_names(): import params if len(params.flume_command_targets) > 0: return params.flume_command_targets else: return find_expected_agent_names(params.flume_conf_dir)
def execute(parameters=None, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: parameters (dictionary): a mapping of parameter key to value host_name (string): the name of this host where the alert is running """ if parameters is None: return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.']) flume_conf_directory = None if FLUME_CONF_DIR_KEY in parameters: flume_conf_directory = parameters[FLUME_CONF_DIR_KEY] if flume_conf_directory is None: return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.']) if host_name is None: host_name = socket.getfqdn() processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR) expected_agents = find_expected_agent_names(flume_conf_directory) alert_label = '' alert_state = RESULT_CODE_OK if len(processes) == 0 and len(expected_agents) == 0: alert_label = 'No agents defined on {0}'.format(host_name) else: ok = [] critical = [] text_arr = [] for process in processes: if not process.has_key('status') or process['status'] == 'NOT_RUNNING': critical.append(process['name']) else: ok.append(process['name']) if len(critical) > 0: text_arr.append("{0} {1} NOT running".format(", ".join(critical), "is" if len(critical) == 1 else "are")) if len(ok) > 0: text_arr.append("{0} {1} running".format(", ".join(ok), "is" if len(ok) == 1 else "are")) plural = len(critical) > 1 or len(ok) > 1 alert_label = "Agent{0} {1} {2}".format( "s" if plural else "", " and ".join(text_arr), "on " + host_name) alert_state = RESULT_CODE_CRITICAL if len(critical) > 0 else RESULT_CODE_OK return (alert_state, [alert_label])
def flume(action=None): import params from service_mapping import flume_win_service_name if action == 'config': ServiceConfig(flume_win_service_name, action="configure", start_type="manual") ServiceConfig(flume_win_service_name, action="change_user", username=params.flume_user, password=Script.get_password(params.flume_user)) # remove previously defined meta's for n in find_expected_agent_names(params.flume_conf_dir): os.unlink( os.path.join(params.flume_conf_dir, n, 'ambari-meta.json')) flume_agents = {} if params.flume_conf_content is not None: flume_agents = build_flume_topology(params.flume_conf_content) for agent in flume_agents.keys(): flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent) flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf') flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json') flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties') flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.ps1') Directory(flume_agent_conf_dir) PropertiesFile(flume_agent_conf_file, properties=flume_agents[agent]) File(flume_agent_log4j_file, content=InlineTemplate(params.flume_log4j_content, agent_name=agent)), File(flume_agent_meta_file, content=json.dumps(ambari_meta(agent, flume_agents[agent]))) File(flume_agent_env_file, owner=params.flume_user, content=InlineTemplate(params.flume_env_sh_template)) if params.has_metric_collector: File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"), owner=params.flume_user, content=Template("flume-metrics2.properties.j2"))
def flume(action = None): import params from service_mapping import flume_win_service_name if action == 'config': ServiceConfig(flume_win_service_name, action="configure", start_type="manual") ServiceConfig(flume_win_service_name, action="change_user", username=params.flume_user, password = Script.get_password(params.flume_user)) # remove previously defined meta's for n in find_expected_agent_names(params.flume_conf_dir): os.unlink(os.path.join(params.flume_conf_dir, n, 'ambari-meta.json')) flume_agents = {} if params.flume_conf_content is not None: flume_agents = build_flume_topology(params.flume_conf_content) for agent in flume_agents.keys(): flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent) flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf') flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json') flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties') flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.ps1') Directory(flume_agent_conf_dir ) PropertiesFile(flume_agent_conf_file, properties=flume_agents[agent]) File(flume_agent_log4j_file, content=Template('log4j.properties.j2', agent_name = agent)) File(flume_agent_meta_file, content = json.dumps(ambari_meta(agent, flume_agents[agent]))) File(flume_agent_env_file, owner=params.flume_user, content=InlineTemplate(params.flume_env_sh_template) ) if params.has_metric_collector: File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"), owner=params.flume_user, content=Template("flume-metrics2.properties.j2") )
def status(self, env): import params env.set_params(params) processes = get_flume_status(params.flume_conf_dir, params.flume_run_dir) expected_agents = find_expected_agent_names(params.flume_conf_dir) json = {} json['processes'] = processes self.put_structured_out(json) if len(expected_agents) > 0: for proc in processes: if not proc.has_key('status') or proc['status'] == 'NOT_RUNNING': raise ComponentIsNotRunning() elif len(expected_agents) == 0 and 'INSTALLED' == get_desired_state(): raise ComponentIsNotRunning()
def flume(action=None): import params if action == 'config': # remove previously defined meta's for n in find_expected_agent_names(params.flume_conf_dir): os.unlink( os.path.join(params.flume_conf_dir, n, 'ambari-meta.json')) flume_agents = {} if params.flume_conf_content is not None: flume_agents = build_flume_topology(params.flume_conf_content) for agent in flume_agents.keys(): flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent) flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf') flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json') flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties') flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.ps1') Directory(flume_agent_conf_dir) PropertiesFile(flume_agent_conf_file, properties=flume_agents[agent]) File(flume_agent_log4j_file, content=Template('log4j.properties.j2', agent_name=agent)) File(flume_agent_meta_file, content=json.dumps(ambari_meta(agent, flume_agents[agent]))) File(flume_agent_env_file, owner=params.flume_user, content=InlineTemplate(params.flume_env_sh_template)) if params.has_metric_collector: File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"), owner=params.flume_user, content=Template("flume-metrics2.properties.j2"))
def status(self, env): import params env.set_params(params) processes = get_flume_status(params.flume_conf_dir, params.flume_run_dir) expected_agents = find_expected_agent_names(params.flume_conf_dir) json = {} json['processes'] = processes self.put_structured_out(json) # only throw an exception if there are agents defined and there is a # problem with the processes; if there are no agents defined, then # the service should report STARTED (green) ONLY if the desired state is started. otherwise, INSTALLED (red) if len(expected_agents) > 0: for proc in processes: if not proc.has_key('status') or proc['status'] == 'NOT_RUNNING': raise ComponentIsNotRunning() elif len(expected_agents) == 0 and 'INSTALLED' == get_desired_state(): raise ComponentIsNotRunning()
def status(self, env): import params env.set_params(params) processes = get_flume_status(params.flume_conf_dir, params.flume_run_dir) expected_agents = find_expected_agent_names(params.flume_conf_dir) json = {} json['processes'] = processes self.put_structured_out(json) # only throw an exception if there are agents defined and there is a # problem with the processes; if there are no agents defined, then # the service should report STARTED (green) ONLY if the desired state is started. otherwise, INSTALLED (red) if len(expected_agents) > 0: for proc in processes: if not proc.has_key('status') or proc['status'] == 'NOT_RUNNING': raise ComponentIsNotRunning() elif len(expected_agents) == 0 and 'INSTALLED' == get_desired_state(): raise ComponentIsNotRunning()
def flume(action = None): import params if action == 'config': # remove previously defined meta's for n in find_expected_agent_names(params.flume_conf_dir): os.unlink(os.path.join(params.flume_conf_dir, n, 'ambari-meta.json')) flume_agents = {} if params.flume_conf_content is not None: flume_agents = build_flume_topology(params.flume_conf_content) for agent in flume_agents.keys(): flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent) flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf') flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json') flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties') flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.ps1') Directory(flume_agent_conf_dir) PropertiesFile(flume_agent_conf_file, properties=flume_agents[agent]) File(flume_agent_log4j_file, content=Template('log4j.properties.j2', agent_name = agent)) File(flume_agent_meta_file, content = json.dumps(ambari_meta(agent, flume_agents[agent]))) File(flume_agent_env_file, owner=params.flume_user, content=InlineTemplate(params.flume_env_sh_template) ) if params.has_metric_collector: File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"), owner=params.flume_user, content=Template("flume-metrics2.properties.j2") )
def flume(action=None): import params if action == 'config': # remove previously defined meta's for n in find_expected_agent_names(params.flume_conf_dir): File( os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'), action="delete", ) Directory(params.flume_run_dir, ) Directory( params.flume_conf_dir, recursive=True, owner=params.flume_user, ) Directory(params.flume_log_dir, owner=params.flume_user) flume_agents = {} if params.flume_conf_content is not None: flume_agents = build_flume_topology(params.flume_conf_content) for agent in flume_agents.keys(): flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent) flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf') flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json') flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties') flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.sh') Directory( flume_agent_conf_dir, owner=params.flume_user, ) PropertiesFile(flume_agent_conf_file, properties=flume_agents[agent], owner=params.flume_user, mode=0644) File(flume_agent_log4j_file, content=Template('log4j.properties.j2', agent_name=agent), owner=params.flume_user, mode=0644) File(flume_agent_meta_file, content=json.dumps(ambari_meta(agent, flume_agents[agent])), owner=params.flume_user, mode=0644) File(flume_agent_env_file, owner=params.flume_user, content=InlineTemplate(params.flume_env_sh_template)) if params.has_metric_collector: File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"), owner=params.flume_user, content=Template("flume-metrics2.properties.j2")) elif action == 'start': # desired state for service should be STARTED if len(params.flume_command_targets) == 0: _set_desired_state('STARTED') # It is important to run this command as a background process. flume_base = as_user(format( "{flume_bin} agent --name {{0}} --conf {{1}} --conf-file {{2}} {{3}} > {flume_log_dir}/{{4}}.out 2>&1" ), params.flume_user, env={'JAVA_HOME': params.java_home}) + " &" for agent in cmd_target_names(): flume_agent_conf_dir = params.flume_conf_dir + os.sep + agent flume_agent_conf_file = flume_agent_conf_dir + os.sep + "flume.conf" flume_agent_pid_file = params.flume_run_dir + os.sep + agent + ".pid" if not os.path.isfile(flume_agent_conf_file): continue if not is_flume_process_live(flume_agent_pid_file): # TODO someday make the ganglia ports configurable extra_args = '' if params.ganglia_server_host is not None: extra_args = '-Dflume.monitoring.type=ganglia -Dflume.monitoring.hosts={0}:{1}' extra_args = extra_args.format(params.ganglia_server_host, '8655') if params.has_metric_collector: extra_args = '-Dflume.monitoring.type=org.apache.hadoop.metrics2.sink.flume.FlumeTimelineMetricsSink ' \ '-Dflume.monitoring.node={0}:{1}' extra_args = extra_args.format( params.metric_collector_host, params.metric_collector_port) flume_cmd = flume_base.format(agent, flume_agent_conf_dir, flume_agent_conf_file, extra_args, agent) Execute(flume_cmd, wait_for_finish=False, environment={'JAVA_HOME': params.java_home}) # sometimes startup spawns a couple of threads - so only the first line may count pid_cmd = as_sudo(('pgrep', '-o', '-u', params.flume_user, '-f', format('^{java_home}.*{agent}.*'))) + \ " | " + as_sudo(('tee', flume_agent_pid_file)) + " && test ${PIPESTATUS[0]} -eq 0" Execute(pid_cmd, logoutput=True, tries=20, try_sleep=10) pass elif action == 'stop': # desired state for service should be INSTALLED if len(params.flume_command_targets) == 0: _set_desired_state('INSTALLED') pid_files = glob.glob(params.flume_run_dir + os.sep + "*.pid") if 0 == len(pid_files): return agent_names = cmd_target_names() for agent in agent_names: pid_file = format("{flume_run_dir}/{agent}.pid") if is_flume_process_live(pid_file): pid = shell.checked_call(("cat", pid_file), sudo=True)[1].strip() Execute(("kill", "-15", pid), sudo=True) # kill command has to be a tuple if not await_flume_process_termination(pid_file): raise Fail("Can't stop flume agent: {0}".format(agent)) File(pid_file, action='delete')
def status(self, env): import params env.set_params(params) processes = get_flume_status(params.flume_conf_dir, params.flume_run_dir) expected_agents = find_expected_agent_names(params.flume_conf_dir) json = {} json['processes'] = processes json['alerts'] = [] alert = {} alert['name'] = 'flume_agent' alert['label'] = 'Flume Agent process' if len(processes) == 0 and len(expected_agents) == 0: alert['state'] = 'OK' if not params.hostname is None: alert['text'] = 'No agents defined on ' + params.hostname else: alert['text'] = 'No agents defined' else: crit = [] ok = [] for proc in processes: if not proc.has_key( 'status') or proc['status'] == 'NOT_RUNNING': crit.append(proc['name']) else: ok.append(proc['name']) text_arr = [] if len(crit) > 0: text_arr.append("{0} {1} NOT running".format( ", ".join(crit), "is" if len(crit) == 1 else "are")) if len(ok) > 0: text_arr.append("{0} {1} running".format( ", ".join(ok), "is" if len(ok) == 1 else "are")) plural = len(crit) > 1 or len(ok) > 1 alert['text'] = "Agent{0} {1} {2}".format( "s" if plural else "", " and ".join(text_arr), "" if params.hostname is None else "on " + str(params.hostname)) alert['state'] = 'CRITICAL' if len(crit) > 0 else 'OK' json['alerts'].append(alert) self.put_structured_out(json) # only throw an exception if there are agents defined and there is a # problem with the processes; if there are no agents defined, then # the service should report STARTED (green) ONLY if the desired state is started. otherwise, INSTALLED (red) if len(expected_agents) > 0: for proc in processes: if not proc.has_key( 'status') or proc['status'] == 'NOT_RUNNING': raise ComponentIsNotRunning() elif len(expected_agents) == 0 and 'INSTALLED' == get_desired_state(): raise ComponentIsNotRunning()
def flume(action = None): import params if action == 'config': # remove previously defined meta's for n in find_expected_agent_names(params.flume_conf_dir): os.unlink(os.path.join(params.flume_conf_dir, n, 'ambari-meta.json')) Directory(params.flume_conf_dir, recursive=True) Directory(params.flume_log_dir, owner=params.flume_user) flume_agents = {} if params.flume_conf_content is not None: flume_agents = build_flume_topology(params.flume_conf_content) for agent in flume_agents.keys(): flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent) flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf') flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json') flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties') flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.sh') Directory(flume_agent_conf_dir) PropertiesFile(flume_agent_conf_file, properties=flume_agents[agent], mode = 0644) File(flume_agent_log4j_file, content=Template('log4j.properties.j2', agent_name = agent), mode = 0644) File(flume_agent_meta_file, content = json.dumps(ambari_meta(agent, flume_agents[agent])), mode = 0644) File(flume_agent_env_file, owner=params.flume_user, content=InlineTemplate(params.flume_env_sh_template) ) if params.has_metric_collector: File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"), owner=params.flume_user, content=Template("flume-metrics2.properties.j2") ) elif action == 'start': # desired state for service should be STARTED if len(params.flume_command_targets) == 0: _set_desired_state('STARTED') # It is important to run this command as a background process. flume_base = as_user(format("{flume_bin} agent --name {{0}} --conf {{1}} --conf-file {{2}} {{3}} > {flume_log_dir}/{{4}}.out 2>&1"), params.flume_user, env={'JAVA_HOME': params.java_home}) + " &" for agent in cmd_target_names(): flume_agent_conf_dir = params.flume_conf_dir + os.sep + agent flume_agent_conf_file = flume_agent_conf_dir + os.sep + "flume.conf" flume_agent_pid_file = params.flume_run_dir + os.sep + agent + ".pid" if not os.path.isfile(flume_agent_conf_file): continue if not is_flume_process_live(flume_agent_pid_file): # TODO someday make the ganglia ports configurable extra_args = '' if params.ganglia_server_host is not None: extra_args = '-Dflume.monitoring.type=ganglia -Dflume.monitoring.hosts={0}:{1}' extra_args = extra_args.format(params.ganglia_server_host, '8655') if params.has_metric_collector: extra_args = '-Dflume.monitoring.type=org.apache.hadoop.metrics2.sink.flume.FlumeTimelineMetricsSink ' \ '-Dflume.monitoring.node={0}:{1}' extra_args = extra_args.format(params.metric_collector_host, params.metric_collector_port) flume_cmd = flume_base.format(agent, flume_agent_conf_dir, flume_agent_conf_file, extra_args, agent) Execute(flume_cmd, wait_for_finish=False, environment={'JAVA_HOME': params.java_home} ) # sometimes startup spawns a couple of threads - so only the first line may count pid_cmd = format('pgrep -o -u {flume_user} -f ^{java_home}.*{agent}.* > {flume_agent_pid_file}') Execute(pid_cmd, logoutput=True, tries=20, try_sleep=10) pass elif action == 'stop': # desired state for service should be INSTALLED if len(params.flume_command_targets) == 0: _set_desired_state('INSTALLED') pid_files = glob.glob(params.flume_run_dir + os.sep + "*.pid") if 0 == len(pid_files): return agent_names = cmd_target_names() for agent in agent_names: pid_file = params.flume_run_dir + os.sep + agent + '.pid' pid = format('`cat {pid_file}` > /dev/null 2>&1') Execute(format('kill {pid}'), ignore_failures=True) File(pid_file, action = 'delete')
def flume(action = None): import params if action == 'config': # remove previously defined meta's for n in find_expected_agent_names(params.flume_conf_dir): File(os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'), action = "delete", ) Directory(params.flume_run_dir, ) Directory(params.flume_conf_dir, recursive=True, owner=params.flume_user, ) Directory(params.flume_log_dir, owner=params.flume_user, cd_access="a", mode=0755, ) flume_agents = {} if params.flume_conf_content is not None: flume_agents = build_flume_topology(params.flume_conf_content) for agent in flume_agents.keys(): flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent) flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf') flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json') flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties') flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.sh') Directory(flume_agent_conf_dir, owner=params.flume_user, ) PropertiesFile(flume_agent_conf_file, properties=flume_agents[agent], owner=params.flume_user, mode = 0644) File(flume_agent_log4j_file, content=Template('log4j.properties.j2', agent_name = agent), owner=params.flume_user, mode = 0644) File(flume_agent_meta_file, content = json.dumps(ambari_meta(agent, flume_agents[agent])), owner=params.flume_user, mode = 0644) File(flume_agent_env_file, owner=params.flume_user, content=InlineTemplate(params.flume_env_sh_template) ) if params.has_metric_collector: File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"), owner=params.flume_user, content=Template("flume-metrics2.properties.j2") ) elif action == 'start': # desired state for service should be STARTED if len(params.flume_command_targets) == 0: _set_desired_state('STARTED') # It is important to run this command as a background process. flume_base = as_user(format("{flume_bin} agent --name {{0}} --conf {{1}} --conf-file {{2}} {{3}} > {flume_log_dir}/{{4}}.out 2>&1"), params.flume_user, env={'JAVA_HOME': params.java_home}) + " &" for agent in cmd_target_names(): flume_agent_conf_dir = params.flume_conf_dir + os.sep + agent flume_agent_conf_file = flume_agent_conf_dir + os.sep + "flume.conf" flume_agent_pid_file = params.flume_run_dir + os.sep + agent + ".pid" if not os.path.isfile(flume_agent_conf_file): continue if not is_flume_process_live(flume_agent_pid_file): # TODO someday make the ganglia ports configurable extra_args = '' if params.ganglia_server_host is not None: extra_args = '-Dflume.monitoring.type=ganglia -Dflume.monitoring.hosts={0}:{1}' extra_args = extra_args.format(params.ganglia_server_host, '8655') if params.has_metric_collector: extra_args = '-Dflume.monitoring.type=org.apache.hadoop.metrics2.sink.flume.FlumeTimelineMetricsSink ' \ '-Dflume.monitoring.node={0}:{1}' extra_args = extra_args.format(params.metric_collector_host, params.metric_collector_port) flume_cmd = flume_base.format(agent, flume_agent_conf_dir, flume_agent_conf_file, extra_args, agent) Execute(flume_cmd, wait_for_finish=False, environment={'JAVA_HOME': params.java_home} ) # sometimes startup spawns a couple of threads - so only the first line may count pid_cmd = as_sudo(('pgrep', '-o', '-u', params.flume_user, '-f', format('^{java_home}.*{agent}.*'))) + \ " | " + as_sudo(('tee', flume_agent_pid_file)) + " && test ${PIPESTATUS[0]} -eq 0" Execute(pid_cmd, logoutput=True, tries=20, try_sleep=10) pass elif action == 'stop': # desired state for service should be INSTALLED if len(params.flume_command_targets) == 0: _set_desired_state('INSTALLED') pid_files = glob.glob(params.flume_run_dir + os.sep + "*.pid") if 0 == len(pid_files): return agent_names = cmd_target_names() for agent in agent_names: pid_file = format("{flume_run_dir}/{agent}.pid") if is_flume_process_live(pid_file): pid = shell.checked_call(("cat", pid_file), sudo=True)[1].strip() Execute(("kill", "-15", pid), sudo=True) # kill command has to be a tuple if not await_flume_process_termination(pid_file): raise Fail("Can't stop flume agent: {0}".format(agent)) File(pid_file, action = 'delete')