Exemple #1
0
def cmd_target_names():
  import params

  if len(params.flume_command_targets) > 0:
    return params.flume_command_targets
  else:
    return find_expected_agent_names(params.flume_conf_dir)
Exemple #2
0
def cmd_target_names():
    import params

    if len(params.flume_command_targets) > 0:
        return params.flume_command_targets
    else:
        return find_expected_agent_names(params.flume_conf_dir)
def execute(parameters=None, host_name=None):
  """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  parameters (dictionary): a mapping of parameter key to value
  host_name (string): the name of this host where the alert is running
  """

  if parameters is None:
    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])

  flume_conf_directory = None
  if FLUME_CONF_DIR_KEY in parameters:
    flume_conf_directory = parameters[FLUME_CONF_DIR_KEY]

  if flume_conf_directory is None:
    return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])

  if host_name is None:
    host_name = socket.getfqdn()

  processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR)
  expected_agents = find_expected_agent_names(flume_conf_directory)

  alert_label = ''
  alert_state = RESULT_CODE_OK

  if len(processes) == 0 and len(expected_agents) == 0:
    alert_label = 'No agents defined on {0}'.format(host_name)
  else:
    ok = []
    critical = []
    text_arr = []

    for process in processes:
      if not process.has_key('status') or process['status'] == 'NOT_RUNNING':
        critical.append(process['name'])
      else:
        ok.append(process['name'])

    if len(critical) > 0:
      text_arr.append("{0} {1} NOT running".format(", ".join(critical),
        "is" if len(critical) == 1 else "are"))

    if len(ok) > 0:
      text_arr.append("{0} {1} running".format(", ".join(ok),
        "is" if len(ok) == 1 else "are"))

    plural = len(critical) > 1 or len(ok) > 1
    alert_label = "Agent{0} {1} {2}".format(
      "s" if plural else "",
      " and ".join(text_arr),
      "on " + host_name)

    alert_state = RESULT_CODE_CRITICAL if len(critical) > 0 else RESULT_CODE_OK

  return (alert_state, [alert_label])
Exemple #4
0
def flume(action=None):
    import params

    from service_mapping import flume_win_service_name

    if action == 'config':
        ServiceConfig(flume_win_service_name,
                      action="configure",
                      start_type="manual")

        ServiceConfig(flume_win_service_name,
                      action="change_user",
                      username=params.flume_user,
                      password=Script.get_password(params.flume_user))

        # remove previously defined meta's
        for n in find_expected_agent_names(params.flume_conf_dir):
            os.unlink(
                os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'))

        flume_agents = {}
        if params.flume_conf_content is not None:
            flume_agents = build_flume_topology(params.flume_conf_content)

        for agent in flume_agents.keys():
            flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent)
            flume_agent_conf_file = os.path.join(flume_agent_conf_dir,
                                                 'flume.conf')
            flume_agent_meta_file = os.path.join(flume_agent_conf_dir,
                                                 'ambari-meta.json')
            flume_agent_log4j_file = os.path.join(flume_agent_conf_dir,
                                                  'log4j.properties')
            flume_agent_env_file = os.path.join(flume_agent_conf_dir,
                                                'flume-env.ps1')

            Directory(flume_agent_conf_dir)

            PropertiesFile(flume_agent_conf_file,
                           properties=flume_agents[agent])

            File(flume_agent_log4j_file,
                 content=InlineTemplate(params.flume_log4j_content,
                                        agent_name=agent)),

            File(flume_agent_meta_file,
                 content=json.dumps(ambari_meta(agent, flume_agents[agent])))

            File(flume_agent_env_file,
                 owner=params.flume_user,
                 content=InlineTemplate(params.flume_env_sh_template))

            if params.has_metric_collector:
                File(os.path.join(flume_agent_conf_dir,
                                  "flume-metrics2.properties"),
                     owner=params.flume_user,
                     content=Template("flume-metrics2.properties.j2"))
Exemple #5
0
def flume(action = None):
  import params

  from service_mapping import flume_win_service_name

  if action == 'config':
    ServiceConfig(flume_win_service_name,
                  action="configure",
                  start_type="manual")

    ServiceConfig(flume_win_service_name,
                  action="change_user",
                  username=params.flume_user,
                  password = Script.get_password(params.flume_user))

    # remove previously defined meta's
    for n in find_expected_agent_names(params.flume_conf_dir):
      os.unlink(os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'))

    flume_agents = {}
    if params.flume_conf_content is not None:
      flume_agents = build_flume_topology(params.flume_conf_content)

    for agent in flume_agents.keys():
      flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent)
      flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf')
      flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json')
      flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties')
      flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.ps1')

      Directory(flume_agent_conf_dir
      )

      PropertiesFile(flume_agent_conf_file,
                     properties=flume_agents[agent])

      File(flume_agent_log4j_file,
           content=Template('log4j.properties.j2', agent_name = agent))

      File(flume_agent_meta_file,
           content = json.dumps(ambari_meta(agent, flume_agents[agent])))

      File(flume_agent_env_file,
           owner=params.flume_user,
           content=InlineTemplate(params.flume_env_sh_template)
      )

      if params.has_metric_collector:
        File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"),
             owner=params.flume_user,
             content=Template("flume-metrics2.properties.j2")
        )
Exemple #6
0
    def status(self, env):
        import params
        env.set_params(params)
        processes = get_flume_status(params.flume_conf_dir, params.flume_run_dir)
        expected_agents = find_expected_agent_names(params.flume_conf_dir)

        json = {}
        json['processes'] = processes
        self.put_structured_out(json)

        if len(expected_agents) > 0:
            for proc in processes:
                if not proc.has_key('status') or proc['status'] == 'NOT_RUNNING':
                    raise ComponentIsNotRunning()
        elif len(expected_agents) == 0 and 'INSTALLED' == get_desired_state():
            raise ComponentIsNotRunning()
Exemple #7
0
def flume(action=None):
    import params

    if action == 'config':
        # remove previously defined meta's
        for n in find_expected_agent_names(params.flume_conf_dir):
            os.unlink(
                os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'))

        flume_agents = {}
        if params.flume_conf_content is not None:
            flume_agents = build_flume_topology(params.flume_conf_content)

        for agent in flume_agents.keys():
            flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent)
            flume_agent_conf_file = os.path.join(flume_agent_conf_dir,
                                                 'flume.conf')
            flume_agent_meta_file = os.path.join(flume_agent_conf_dir,
                                                 'ambari-meta.json')
            flume_agent_log4j_file = os.path.join(flume_agent_conf_dir,
                                                  'log4j.properties')
            flume_agent_env_file = os.path.join(flume_agent_conf_dir,
                                                'flume-env.ps1')

            Directory(flume_agent_conf_dir)

            PropertiesFile(flume_agent_conf_file,
                           properties=flume_agents[agent])

            File(flume_agent_log4j_file,
                 content=Template('log4j.properties.j2', agent_name=agent))

            File(flume_agent_meta_file,
                 content=json.dumps(ambari_meta(agent, flume_agents[agent])))

            File(flume_agent_env_file,
                 owner=params.flume_user,
                 content=InlineTemplate(params.flume_env_sh_template))

            if params.has_metric_collector:
                File(os.path.join(flume_agent_conf_dir,
                                  "flume-metrics2.properties"),
                     owner=params.flume_user,
                     content=Template("flume-metrics2.properties.j2"))
Exemple #8
0
  def status(self, env):
    import params
    env.set_params(params)
    processes = get_flume_status(params.flume_conf_dir, params.flume_run_dir)
    expected_agents = find_expected_agent_names(params.flume_conf_dir)

    json = {}
    json['processes'] = processes
    self.put_structured_out(json)

    # only throw an exception if there are agents defined and there is a
    # problem with the processes; if there are no agents defined, then
    # the service should report STARTED (green) ONLY if the desired state is started.  otherwise, INSTALLED (red)
    if len(expected_agents) > 0:
      for proc in processes:
        if not proc.has_key('status') or proc['status'] == 'NOT_RUNNING':
          raise ComponentIsNotRunning()
    elif len(expected_agents) == 0 and 'INSTALLED' == get_desired_state():
      raise ComponentIsNotRunning()
  def status(self, env):
    import params
    env.set_params(params)
    processes = get_flume_status(params.flume_conf_dir, params.flume_run_dir)
    expected_agents = find_expected_agent_names(params.flume_conf_dir)

    json = {}
    json['processes'] = processes
    self.put_structured_out(json)

    # only throw an exception if there are agents defined and there is a 
    # problem with the processes; if there are no agents defined, then 
    # the service should report STARTED (green) ONLY if the desired state is started.  otherwise, INSTALLED (red)
    if len(expected_agents) > 0:
      for proc in processes:
        if not proc.has_key('status') or proc['status'] == 'NOT_RUNNING':
          raise ComponentIsNotRunning()
    elif len(expected_agents) == 0 and 'INSTALLED' == get_desired_state():
      raise ComponentIsNotRunning()
Exemple #10
0
def flume(action = None):
  import params

  if action == 'config':
    # remove previously defined meta's
    for n in find_expected_agent_names(params.flume_conf_dir):
      os.unlink(os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'))

    flume_agents = {}
    if params.flume_conf_content is not None:
      flume_agents = build_flume_topology(params.flume_conf_content)

    for agent in flume_agents.keys():
      flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent)
      flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf')
      flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json')
      flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties')
      flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.ps1')

      Directory(flume_agent_conf_dir)

      PropertiesFile(flume_agent_conf_file,
                     properties=flume_agents[agent])

      File(flume_agent_log4j_file,
           content=Template('log4j.properties.j2', agent_name = agent))

      File(flume_agent_meta_file,
           content = json.dumps(ambari_meta(agent, flume_agents[agent])))

      File(flume_agent_env_file,
           owner=params.flume_user,
           content=InlineTemplate(params.flume_env_sh_template)
      )

      if params.has_metric_collector:
        File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"),
             owner=params.flume_user,
             content=Template("flume-metrics2.properties.j2")
        )
Exemple #11
0
def flume(action=None):
    import params

    if action == 'config':
        # remove previously defined meta's
        for n in find_expected_agent_names(params.flume_conf_dir):
            File(
                os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'),
                action="delete",
            )

        Directory(params.flume_run_dir, )

        Directory(
            params.flume_conf_dir,
            recursive=True,
            owner=params.flume_user,
        )
        Directory(params.flume_log_dir, owner=params.flume_user)

        flume_agents = {}
        if params.flume_conf_content is not None:
            flume_agents = build_flume_topology(params.flume_conf_content)

        for agent in flume_agents.keys():
            flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent)
            flume_agent_conf_file = os.path.join(flume_agent_conf_dir,
                                                 'flume.conf')
            flume_agent_meta_file = os.path.join(flume_agent_conf_dir,
                                                 'ambari-meta.json')
            flume_agent_log4j_file = os.path.join(flume_agent_conf_dir,
                                                  'log4j.properties')
            flume_agent_env_file = os.path.join(flume_agent_conf_dir,
                                                'flume-env.sh')

            Directory(
                flume_agent_conf_dir,
                owner=params.flume_user,
            )

            PropertiesFile(flume_agent_conf_file,
                           properties=flume_agents[agent],
                           owner=params.flume_user,
                           mode=0644)

            File(flume_agent_log4j_file,
                 content=Template('log4j.properties.j2', agent_name=agent),
                 owner=params.flume_user,
                 mode=0644)

            File(flume_agent_meta_file,
                 content=json.dumps(ambari_meta(agent, flume_agents[agent])),
                 owner=params.flume_user,
                 mode=0644)

            File(flume_agent_env_file,
                 owner=params.flume_user,
                 content=InlineTemplate(params.flume_env_sh_template))

            if params.has_metric_collector:
                File(os.path.join(flume_agent_conf_dir,
                                  "flume-metrics2.properties"),
                     owner=params.flume_user,
                     content=Template("flume-metrics2.properties.j2"))

    elif action == 'start':
        # desired state for service should be STARTED
        if len(params.flume_command_targets) == 0:
            _set_desired_state('STARTED')

        # It is important to run this command as a background process.

        flume_base = as_user(format(
            "{flume_bin} agent --name {{0}} --conf {{1}} --conf-file {{2}} {{3}} > {flume_log_dir}/{{4}}.out 2>&1"
        ),
                             params.flume_user,
                             env={'JAVA_HOME': params.java_home}) + " &"

        for agent in cmd_target_names():
            flume_agent_conf_dir = params.flume_conf_dir + os.sep + agent
            flume_agent_conf_file = flume_agent_conf_dir + os.sep + "flume.conf"
            flume_agent_pid_file = params.flume_run_dir + os.sep + agent + ".pid"

            if not os.path.isfile(flume_agent_conf_file):
                continue

            if not is_flume_process_live(flume_agent_pid_file):
                # TODO someday make the ganglia ports configurable
                extra_args = ''
                if params.ganglia_server_host is not None:
                    extra_args = '-Dflume.monitoring.type=ganglia -Dflume.monitoring.hosts={0}:{1}'
                    extra_args = extra_args.format(params.ganglia_server_host,
                                                   '8655')
                if params.has_metric_collector:
                    extra_args = '-Dflume.monitoring.type=org.apache.hadoop.metrics2.sink.flume.FlumeTimelineMetricsSink ' \
                                 '-Dflume.monitoring.node={0}:{1}'
                    extra_args = extra_args.format(
                        params.metric_collector_host,
                        params.metric_collector_port)

                flume_cmd = flume_base.format(agent, flume_agent_conf_dir,
                                              flume_agent_conf_file,
                                              extra_args, agent)

                Execute(flume_cmd,
                        wait_for_finish=False,
                        environment={'JAVA_HOME': params.java_home})
                # sometimes startup spawns a couple of threads - so only the first line may count
                pid_cmd = as_sudo(('pgrep', '-o', '-u', params.flume_user, '-f', format('^{java_home}.*{agent}.*'))) + \
                " | " + as_sudo(('tee', flume_agent_pid_file)) + "  && test ${PIPESTATUS[0]} -eq 0"
                Execute(pid_cmd, logoutput=True, tries=20, try_sleep=10)

        pass
    elif action == 'stop':
        # desired state for service should be INSTALLED
        if len(params.flume_command_targets) == 0:
            _set_desired_state('INSTALLED')

        pid_files = glob.glob(params.flume_run_dir + os.sep + "*.pid")

        if 0 == len(pid_files):
            return

        agent_names = cmd_target_names()

        for agent in agent_names:
            pid_file = format("{flume_run_dir}/{agent}.pid")

            if is_flume_process_live(pid_file):
                pid = shell.checked_call(("cat", pid_file),
                                         sudo=True)[1].strip()
                Execute(("kill", "-15", pid),
                        sudo=True)  # kill command has to be a tuple

            if not await_flume_process_termination(pid_file):
                raise Fail("Can't stop flume agent: {0}".format(agent))

            File(pid_file, action='delete')
Exemple #12
0
    def status(self, env):
        import params
        env.set_params(params)
        processes = get_flume_status(params.flume_conf_dir,
                                     params.flume_run_dir)
        expected_agents = find_expected_agent_names(params.flume_conf_dir)

        json = {}
        json['processes'] = processes
        json['alerts'] = []

        alert = {}
        alert['name'] = 'flume_agent'
        alert['label'] = 'Flume Agent process'

        if len(processes) == 0 and len(expected_agents) == 0:
            alert['state'] = 'OK'

            if not params.hostname is None:
                alert['text'] = 'No agents defined on ' + params.hostname
            else:
                alert['text'] = 'No agents defined'

        else:
            crit = []
            ok = []

            for proc in processes:
                if not proc.has_key(
                        'status') or proc['status'] == 'NOT_RUNNING':
                    crit.append(proc['name'])
                else:
                    ok.append(proc['name'])

            text_arr = []

            if len(crit) > 0:
                text_arr.append("{0} {1} NOT running".format(
                    ", ".join(crit), "is" if len(crit) == 1 else "are"))

            if len(ok) > 0:
                text_arr.append("{0} {1} running".format(
                    ", ".join(ok), "is" if len(ok) == 1 else "are"))

            plural = len(crit) > 1 or len(ok) > 1
            alert['text'] = "Agent{0} {1} {2}".format(
                "s" if plural else "", " and ".join(text_arr),
                "" if params.hostname is None else "on " +
                str(params.hostname))

            alert['state'] = 'CRITICAL' if len(crit) > 0 else 'OK'

        json['alerts'].append(alert)
        self.put_structured_out(json)

        # only throw an exception if there are agents defined and there is a
        # problem with the processes; if there are no agents defined, then
        # the service should report STARTED (green) ONLY if the desired state is started.  otherwise, INSTALLED (red)
        if len(expected_agents) > 0:
            for proc in processes:
                if not proc.has_key(
                        'status') or proc['status'] == 'NOT_RUNNING':
                    raise ComponentIsNotRunning()
        elif len(expected_agents) == 0 and 'INSTALLED' == get_desired_state():
            raise ComponentIsNotRunning()
Exemple #13
0
def flume(action = None):
  import params

  if action == 'config':
    # remove previously defined meta's
    for n in find_expected_agent_names(params.flume_conf_dir):
      os.unlink(os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'))

    Directory(params.flume_conf_dir, recursive=True)
    Directory(params.flume_log_dir, owner=params.flume_user)

    flume_agents = {}
    if params.flume_conf_content is not None:
      flume_agents = build_flume_topology(params.flume_conf_content)

    for agent in flume_agents.keys():
      flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent)
      flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf')
      flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json')
      flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties')
      flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.sh')

      Directory(flume_agent_conf_dir)

      PropertiesFile(flume_agent_conf_file,
        properties=flume_agents[agent],
        mode = 0644)

      File(flume_agent_log4j_file,
        content=Template('log4j.properties.j2', agent_name = agent),
        mode = 0644)

      File(flume_agent_meta_file,
        content = json.dumps(ambari_meta(agent, flume_agents[agent])),
        mode = 0644)

      File(flume_agent_env_file,
           owner=params.flume_user,
           content=InlineTemplate(params.flume_env_sh_template)
      )

      if params.has_metric_collector:
        File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"),
             owner=params.flume_user,
             content=Template("flume-metrics2.properties.j2")
        )

  elif action == 'start':
    # desired state for service should be STARTED
    if len(params.flume_command_targets) == 0:
      _set_desired_state('STARTED')

    # It is important to run this command as a background process.

    flume_base = as_user(format("{flume_bin} agent --name {{0}} --conf {{1}} --conf-file {{2}} {{3}} > {flume_log_dir}/{{4}}.out 2>&1"), params.flume_user, env={'JAVA_HOME': params.java_home}) + " &"

    for agent in cmd_target_names():
      flume_agent_conf_dir = params.flume_conf_dir + os.sep + agent
      flume_agent_conf_file = flume_agent_conf_dir + os.sep + "flume.conf"
      flume_agent_pid_file = params.flume_run_dir + os.sep + agent + ".pid"

      if not os.path.isfile(flume_agent_conf_file):
        continue

      if not is_flume_process_live(flume_agent_pid_file):
        # TODO someday make the ganglia ports configurable
        extra_args = ''
        if params.ganglia_server_host is not None:
          extra_args = '-Dflume.monitoring.type=ganglia -Dflume.monitoring.hosts={0}:{1}'
          extra_args = extra_args.format(params.ganglia_server_host, '8655')
        if params.has_metric_collector:
          extra_args = '-Dflume.monitoring.type=org.apache.hadoop.metrics2.sink.flume.FlumeTimelineMetricsSink ' \
                       '-Dflume.monitoring.node={0}:{1}'
          extra_args = extra_args.format(params.metric_collector_host, params.metric_collector_port)

        flume_cmd = flume_base.format(agent, flume_agent_conf_dir,
           flume_agent_conf_file, extra_args, agent)

        Execute(flume_cmd, 
          wait_for_finish=False,
          environment={'JAVA_HOME': params.java_home}
        )

        # sometimes startup spawns a couple of threads - so only the first line may count

        pid_cmd = format('pgrep -o -u {flume_user} -f ^{java_home}.*{agent}.* > {flume_agent_pid_file}')
        Execute(pid_cmd,
                logoutput=True,
                tries=20,
                try_sleep=10)

    pass
  elif action == 'stop':
    # desired state for service should be INSTALLED
    if len(params.flume_command_targets) == 0:
      _set_desired_state('INSTALLED')

    pid_files = glob.glob(params.flume_run_dir + os.sep + "*.pid")

    if 0 == len(pid_files):
      return

    agent_names = cmd_target_names()


    for agent in agent_names:
      pid_file = params.flume_run_dir + os.sep + agent + '.pid'
      pid = format('`cat {pid_file}` > /dev/null 2>&1')
      Execute(format('kill {pid}'), ignore_failures=True)
      File(pid_file, action = 'delete')
Exemple #14
0
def flume(action = None):
  import params

  if action == 'config':
    # remove previously defined meta's
    for n in find_expected_agent_names(params.flume_conf_dir):
      File(os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'),
        action = "delete",
      )
      
    Directory(params.flume_run_dir,
    )

    Directory(params.flume_conf_dir,
              recursive=True,
              owner=params.flume_user,
              )
    Directory(params.flume_log_dir, 
              owner=params.flume_user,
              cd_access="a",
              mode=0755,
    )

    flume_agents = {}
    if params.flume_conf_content is not None:
      flume_agents = build_flume_topology(params.flume_conf_content)

    for agent in flume_agents.keys():
      flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent)
      flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf')
      flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json')
      flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties')
      flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.sh')

      Directory(flume_agent_conf_dir,
                owner=params.flume_user,
                )

      PropertiesFile(flume_agent_conf_file,
        properties=flume_agents[agent],
        owner=params.flume_user,
        mode = 0644)

      File(flume_agent_log4j_file,
        content=Template('log4j.properties.j2', agent_name = agent),
        owner=params.flume_user,
        mode = 0644)

      File(flume_agent_meta_file,
        content = json.dumps(ambari_meta(agent, flume_agents[agent])),
        owner=params.flume_user,
        mode = 0644)

      File(flume_agent_env_file,
           owner=params.flume_user,
           content=InlineTemplate(params.flume_env_sh_template)
      )

      if params.has_metric_collector:
        File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"),
             owner=params.flume_user,
             content=Template("flume-metrics2.properties.j2")
        )

  elif action == 'start':
    # desired state for service should be STARTED
    if len(params.flume_command_targets) == 0:
      _set_desired_state('STARTED')

    # It is important to run this command as a background process.

    flume_base = as_user(format("{flume_bin} agent --name {{0}} --conf {{1}} --conf-file {{2}} {{3}} > {flume_log_dir}/{{4}}.out 2>&1"), params.flume_user, env={'JAVA_HOME': params.java_home}) + " &"

    for agent in cmd_target_names():
      flume_agent_conf_dir = params.flume_conf_dir + os.sep + agent
      flume_agent_conf_file = flume_agent_conf_dir + os.sep + "flume.conf"
      flume_agent_pid_file = params.flume_run_dir + os.sep + agent + ".pid"

      if not os.path.isfile(flume_agent_conf_file):
        continue

      if not is_flume_process_live(flume_agent_pid_file):
        # TODO someday make the ganglia ports configurable
        extra_args = ''
        if params.ganglia_server_host is not None:
          extra_args = '-Dflume.monitoring.type=ganglia -Dflume.monitoring.hosts={0}:{1}'
          extra_args = extra_args.format(params.ganglia_server_host, '8655')
        if params.has_metric_collector:
          extra_args = '-Dflume.monitoring.type=org.apache.hadoop.metrics2.sink.flume.FlumeTimelineMetricsSink ' \
                       '-Dflume.monitoring.node={0}:{1}'
          extra_args = extra_args.format(params.metric_collector_host, params.metric_collector_port)

        flume_cmd = flume_base.format(agent, flume_agent_conf_dir,
           flume_agent_conf_file, extra_args, agent)

        Execute(flume_cmd, 
          wait_for_finish=False,
          environment={'JAVA_HOME': params.java_home}
        )
        # sometimes startup spawns a couple of threads - so only the first line may count
        pid_cmd = as_sudo(('pgrep', '-o', '-u', params.flume_user, '-f', format('^{java_home}.*{agent}.*'))) + \
        " | " + as_sudo(('tee', flume_agent_pid_file)) + "  && test ${PIPESTATUS[0]} -eq 0"
        Execute(pid_cmd,
                logoutput=True,
                tries=20,
                try_sleep=10)

    pass
  elif action == 'stop':
    # desired state for service should be INSTALLED
    if len(params.flume_command_targets) == 0:
      _set_desired_state('INSTALLED')

    pid_files = glob.glob(params.flume_run_dir + os.sep + "*.pid")

    if 0 == len(pid_files):
      return

    agent_names = cmd_target_names()


    for agent in agent_names:
      pid_file = format("{flume_run_dir}/{agent}.pid")
      
      if is_flume_process_live(pid_file):
        pid = shell.checked_call(("cat", pid_file), sudo=True)[1].strip()
        Execute(("kill", "-15", pid), sudo=True)    # kill command has to be a tuple
      
      if not await_flume_process_termination(pid_file):
        raise Fail("Can't stop flume agent: {0}".format(agent))
        
      File(pid_file, action = 'delete')