Ejemplo n.º 1
0
def get_system_stats():
    systemStats = {
        'machine': platform.machine(),
        'platform': sys.platform,
        'processor': platform.processor(),
        'pythonV': platform.python_version(),
    }

    platf = sys.platform

    if Platform.is_linux(platf):
        grep = subprocess.Popen(['grep', 'model name', '/proc/cpuinfo'], stdout=subprocess.PIPE, close_fds=True)
        wc = subprocess.Popen(['wc', '-l'], stdin=grep.stdout, stdout=subprocess.PIPE, close_fds=True)
        systemStats['cpuCores'] = int(wc.communicate()[0])

    if Platform.is_darwin(platf):
        systemStats['cpuCores'] = int(subprocess.Popen(['sysctl', 'hw.ncpu'], stdout=subprocess.PIPE, close_fds=True).communicate()[0].split(': ')[1])

    if Platform.is_freebsd(platf):
        systemStats['cpuCores'] = int(subprocess.Popen(['sysctl', 'hw.ncpu'], stdout=subprocess.PIPE, close_fds=True).communicate()[0].split(': ')[1])

    if Platform.is_linux(platf):
        systemStats['nixV'] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats['macV'] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats['fbsdV'] = ('freebsd', version, '')  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats['winV'] = platform.win32_ver()

    return systemStats
Ejemplo n.º 2
0
    def check(self, agentConfig):
        process_exclude_args = agentConfig.get('exclude_process_args', False)
        if process_exclude_args:
            ps_arg = 'aux'
        else:
            ps_arg = 'auxww'
        # Get output from ps
        try:
            ps = sp.Popen(['ps', ps_arg], stdout=sp.PIPE,
                          close_fds=True).communicate()[0]
        except StandardError:
            self.logger.exception('getProcesses')
            return False

        # Split out each process
        processLines = ps.split('\n')

        del processLines[0]  # Removes the headers
        processLines.pop()  # Removes a trailing empty line

        processes = []

        for line in processLines:
            line = line.split(None, 10)
            processes.append(map(lambda s: s.strip(), line))

        return {
            'processes': processes,
            'apiKey': agentConfig['api_key'],
            'host': get_hostname(agentConfig)
        }
Ejemplo n.º 3
0
    def execute(self, process_args, redirect_std_streams=None, env=None):
        try:
            with nested(tempfile.TemporaryFile(), tempfile.TemporaryFile()) as (stdout_f, stderr_f):
                process = subprocess.Popen(
                    process_args,
                    close_fds=not redirect_std_streams,  # only set to True when the streams are not redirected, for WIN compatibility
                    stdout=stdout_f if redirect_std_streams else None,
                    stderr=stderr_f if redirect_std_streams else None,
                    env=env
                )
                self._process = process
                self._running = True

                # Register SIGINT and SIGTERM signal handlers
                self.register_signal_handlers()

                # Wait for process to return
                self._process.wait()
                self._running = False

                if redirect_std_streams:
                    stderr_f.seek(0)
                    err = stderr_f.read()
                    stdout_f.seek(0)
                    out = stdout_f.read()
                    sys.stdout.write(out)
                    sys.stderr.write(err)

            return self._process.returncode
        except Exception:
            log.exception("Could not launch process")
            raise
Ejemplo n.º 4
0
 def _get_hostname_unix():
     try:
         # try fqdn
         p = subprocess.Popen(['/bin/hostname', '-f'], stdout=subprocess.PIPE)
         out, err = p.communicate()
         if p.returncode == 0:
             return out.strip()
     except Exception:
         return None
Ejemplo n.º 5
0
    def check(self, agentConfig):
        if Platform.is_linux():
            try:
                with open('/proc/loadavg', 'r') as load_avg:
                    uptime = load_avg.readline().strip()
            except Exception:
                self.logger.exception('Cannot extract load')
                return False

        elif sys.platform in ('darwin',
                              'sunos5') or sys.platform.startswith("freebsd"):
            # Get output from uptime
            try:
                uptime = sp.Popen(['uptime'], stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]
            except Exception:
                self.logger.exception('Cannot extract load')
                return False

        # Split out the 3 load average values
        load = [
            res.replace(',', '.')
            for res in re.findall(r'([0-9]+[\.,]\d+)', uptime)
        ]
        # Normalize load by number of cores
        try:
            cores = int(agentConfig.get('system_stats').get('cpuCores'))
            assert cores >= 1, "Cannot determine number of cores"
            # Compute a normalized load, named .load.norm to make it easy to find next to .load
            return {
                'system.load.1': float(load[0]),
                'system.load.5': float(load[1]),
                'system.load.15': float(load[2]),
                'system.load.norm.1': float(load[0]) / cores,
                'system.load.norm.5': float(load[1]) / cores,
                'system.load.norm.15': float(load[2]) / cores,
            }
        except Exception:
            # No normalized load available
            return {
                'system.load.1': float(load[0]),
                'system.load.5': float(load[1]),
                'system.load.15': float(load[2])
            }
Ejemplo n.º 6
0
    def __init__(self, logger):
        Check.__init__(self, logger)
        macV = None
        if sys.platform == 'darwin':
            macV = platform.mac_ver()
            macV_minor_version = int(
                re.match(r'10\.(\d+)\.?.*', macV[0]).group(1))

        # Output from top is slightly modified on OS X 10.6 (case #28239) and greater
        if macV and (macV_minor_version >= 6):
            self.topIndex = 6
        else:
            self.topIndex = 5

        self.pagesize = 0
        if sys.platform == 'sunos5':
            try:
                pgsz = sp.Popen(['pagesize'], stdout=sp.PIPE,
                                close_fds=True).communicate()[0]
                self.pagesize = int(pgsz.strip())
            except Exception:
                # No page size available
                pass
Ejemplo n.º 7
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command,
               reporter, tools_jar_path, custom_jar_paths,
               redirect_std_streams):
        statsd_port = self.agentConfig.get('dogstatsd_port', "8125")
        if reporter is None:
            reporter = "statsd:%s" % str(statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            classpath = path_to_jmxfetch
            if tools_jar_path is not None:
                classpath = r"%s:%s" % (tools_jar_path, classpath)
            if custom_jar_paths:
                classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath)

            subprocess_args = [
                path_to_java,  # Path to the java bin
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period',
                str(self.check_frequency *
                    1000),  # Period of the main loop of jmxfetch in ms
                '--conf_directory',
                r"%s" % self.
                confd_path,  # Path of the conf.d directory that will be read by jmxfetch,
                '--log_level',
                JAVA_LOGGING_LEVEL.get(
                    self.logging_config.get("log_level"), "INFO"
                ),  # Log Level: Mapping from Python log level to log4j log levels
                '--log_location',
                r"%s" % self.logging_config.get(
                    'jmxfetch_log_file'),  # Path of the log file
                '--reporter',
                reporter,  # Reporter to use
                '--status_location',
                r"%s" %
                path_to_status_file,  # Path to the status file to write
                command,  # Name of the command
            ]

            if Platform.is_windows():
                # Signal handlers are not supported on Windows:
                # use a file to trigger JMXFetch exit instead
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(
                    len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(
                    len(subprocess_args) - 1, path_to_exit_file)

            subprocess_args.insert(4, '--check')
            for check in jmx_checks:
                subprocess_args.insert(5, check)

            # Specify a maximum memory allocation pool for the JVM
            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            # Specify the initial memory allocation pool for the JVM
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))

            # Launch JMXfetch subprocess manually, w/o get_subprocess_output(), since it's a special case
            with nested(tempfile.TemporaryFile('rw'),
                        tempfile.TemporaryFile('rw')) as (stdout_f, stderr_f):
                jmx_process = subprocess.Popen(
                    subprocess_args,
                    close_fds=
                    not redirect_std_streams,  # only set to True when the streams are not redirected, for WIN compatibility
                    stdout=stdout_f if redirect_std_streams else None,
                    stderr=stderr_f if redirect_std_streams else None)
                self.jmx_process = jmx_process

                # Register SIGINT and SIGTERM signal handlers
                self.register_signal_handlers()

                if redirect_std_streams:
                    # Wait for JMXFetch to return, and write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr
                    out, err = jmx_process.communicate()
                    sys.stdout.write(out)
                    sys.stderr.write(err)
                else:
                    # Wait for JMXFetch to return
                    jmx_process.wait()

            return jmx_process.returncode

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.exception("Couldn't launch JMXFetch")
            raise
Ejemplo n.º 8
0
    def check(self, agentConfig):
        """Return an aggregate of CPU stats across all CPUs
        When figures are not available, False is sent back.
        """
        def format_results(us, sy, wa, idle, st, guest=None):
            data = {
                'cpuUser': us,
                'cpuSystem': sy,
                'cpuWait': wa,
                'cpuIdle': idle,
                'cpuStolen': st,
                'cpuGuest': guest
            }
            return dict((k, v) for k, v in data.iteritems() if v is not None)

        def get_value(legend, data, name, filter_value=None):
            "Using the legend and a metric name, get the value or None from the data line"
            if name in legend:
                value = to_float(data[legend.index(name)])
                if filter_value is not None:
                    if value > filter_value:
                        return None
                return value

            else:
                # FIXME return a float or False, would trigger type error if not python
                self.logger.debug("Cannot extract cpu value %s from %s (%s)" %
                                  (name, data, legend))
                return 0.0

        try:
            if Platform.is_linux():
                mpstat = sp.Popen(['mpstat', '1', '3'],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]
                # topdog@ip:~$ mpstat 1 3
                # Linux 2.6.32-341-ec2 (ip)   01/19/2012  _x86_64_  (2 CPU)
                #
                # 04:22:41 PM  CPU    %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest   %idle
                # 04:22:42 PM  all    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  100.00
                # 04:22:43 PM  all    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  100.00
                # 04:22:44 PM  all    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  100.00
                # Average:     all    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  100.00
                #
                # OR
                #
                # Thanks to Mart Visser to spotting this one.
                # blah:/etc/dd-agent# mpstat
                # Linux 2.6.26-2-xen-amd64 (atira)  02/17/2012  _x86_64_
                #
                # 05:27:03 PM  CPU    %user   %nice   %sys %iowait    %irq   %soft  %steal  %idle   intr/s
                # 05:27:03 PM  all    3.59    0.00    0.68    0.69    0.00   0.00    0.01   95.03    43.65
                #
                lines = mpstat.split("\n")
                legend = [l for l in lines if "%usr" in l or "%user" in l]
                avg = [l for l in lines if "Average" in l]
                if len(legend) == 1 and len(avg) == 1:
                    headers = [
                        h for h in legend[0].split() if h not in ("AM", "PM")
                    ]
                    data = avg[0].split()

                    # Userland
                    # Debian lenny says %user so we look for both
                    # One of them will be 0
                    cpu_metrics = {
                        "%usr": None,
                        "%user": None,
                        "%nice": None,
                        "%iowait": None,
                        "%idle": None,
                        "%sys": None,
                        "%irq": None,
                        "%soft": None,
                        "%steal": None,
                        "%guest": None
                    }

                    for cpu_m in cpu_metrics:
                        cpu_metrics[cpu_m] = get_value(headers,
                                                       data,
                                                       cpu_m,
                                                       filter_value=110)

                    if any([v is None for v in cpu_metrics.values()]):
                        self.logger.warning("Invalid mpstat data: %s" % data)

                    cpu_user = cpu_metrics["%usr"] + cpu_metrics[
                        "%user"] + cpu_metrics["%nice"]
                    cpu_system = cpu_metrics["%sys"] + cpu_metrics[
                        "%irq"] + cpu_metrics["%soft"]
                    cpu_wait = cpu_metrics["%iowait"]
                    cpu_idle = cpu_metrics["%idle"]
                    cpu_stolen = cpu_metrics["%steal"]
                    cpu_guest = cpu_metrics["%guest"]

                    return format_results(cpu_user, cpu_system, cpu_wait,
                                          cpu_idle, cpu_stolen, cpu_guest)
                else:
                    return False

            elif sys.platform == 'darwin':
                # generate 3 seconds of data
                # ['          disk0           disk1       cpu     load average', '    KB/t tps  MB/s     KB/t tps  MB/s  us sy id   1m   5m   15m', '   21.23  13  0.27    17.85   7  0.13  14  7 79  1.04 1.27 1.31', '    4.00   3  0.01     5.00   8  0.04  12 10 78  1.04 1.27 1.31', '']
                iostats = sp.Popen(['iostat', '-C', '-w', '3', '-c', '2'],
                                   stdout=sp.PIPE,
                                   close_fds=True).communicate()[0]
                lines = [l for l in iostats.split("\n") if len(l) > 0]
                legend = [l for l in lines if "us" in l]
                if len(legend) == 1:
                    headers = legend[0].split()
                    data = lines[-1].split()
                    cpu_user = get_value(headers, data, "us")
                    cpu_sys = get_value(headers, data, "sy")
                    cpu_wait = 0
                    cpu_idle = get_value(headers, data, "id")
                    cpu_st = 0
                    return format_results(cpu_user, cpu_sys, cpu_wait,
                                          cpu_idle, cpu_st)
                else:
                    self.logger.warn(
                        "Expected to get at least 4 lines of data from iostat instead of just "
                        + str(iostats[:max(80, len(iostats))]))
                    return False

            elif sys.platform.startswith("freebsd"):
                # generate 3 seconds of data
                # tty            ada0              cd0            pass0             cpu
                # tin  tout  KB/t tps  MB/s   KB/t tps  MB/s   KB/t tps  MB/s  us ni sy in id
                # 0    69 26.71   0  0.01   0.00   0  0.00   0.00   0  0.00   2  0  0  1 97
                # 0    78  0.00   0  0.00   0.00   0  0.00   0.00   0  0.00   0  0  0  0 100
                iostats = sp.Popen(['iostat', '-w', '3', '-c', '2'],
                                   stdout=sp.PIPE,
                                   close_fds=True).communicate()[0]
                lines = [l for l in iostats.split("\n") if len(l) > 0]
                legend = [l for l in lines if "us" in l]
                if len(legend) == 1:
                    headers = legend[0].split()
                    data = lines[-1].split()
                    cpu_user = get_value(headers, data, "us")
                    cpu_nice = get_value(headers, data, "ni")
                    cpu_sys = get_value(headers, data, "sy")
                    cpu_intr = get_value(headers, data, "in")
                    cpu_wait = 0
                    cpu_idle = get_value(headers, data, "id")
                    cpu_stol = 0
                    return format_results(cpu_user + cpu_nice,
                                          cpu_sys + cpu_intr, cpu_wait,
                                          cpu_idle, cpu_stol)

                else:
                    self.logger.warn(
                        "Expected to get at least 4 lines of data from iostat instead of just "
                        + str(iostats[:max(80, len(iostats))]))
                    return False

            elif sys.platform == 'sunos5':
                # mpstat -aq 1 2
                # SET minf mjf xcal  intr ithr  csw icsw migr smtx  srw syscl  usr sys  wt idl sze
                # 0 5239   0 12857 22969 5523 14628   73  546 4055    1 146856    5   6   0  89  24 <-- since boot
                # 1 ...
                # SET minf mjf xcal  intr ithr  csw icsw migr smtx  srw syscl  usr sys  wt idl sze
                # 0 20374   0 45634 57792 5786 26767   80  876 20036    2 724475   13  13   0  75  24 <-- past 1s
                # 1 ...
                # http://docs.oracle.com/cd/E23824_01/html/821-1462/mpstat-1m.html
                #
                # Will aggregate over all processor sets
                mpstat = sp.Popen(['mpstat', '-aq', '1', '2'],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]
                lines = [l for l in mpstat.split("\n") if len(l) > 0]
                # discard the first len(lines)/2 lines
                lines = lines[len(lines) / 2:]
                legend = [l for l in lines if "SET" in l]
                assert len(legend) == 1
                if len(legend) == 1:
                    headers = legend[0].split()
                    # collect stats for each processor set
                    # and aggregate them based on the relative set size
                    d_lines = [l for l in lines if "SET" not in l]
                    user = [
                        get_value(headers, l.split(), "usr") for l in d_lines
                    ]
                    kern = [
                        get_value(headers, l.split(), "sys") for l in d_lines
                    ]
                    wait = [
                        get_value(headers, l.split(), "wt") for l in d_lines
                    ]
                    idle = [
                        get_value(headers, l.split(), "idl") for l in d_lines
                    ]
                    size = [
                        get_value(headers, l.split(), "sze") for l in d_lines
                    ]
                    count = sum(size)
                    rel_size = [s / count for s in size]
                    dot = lambda v1, v2: reduce(operator.add,
                                                map(operator.mul, v1, v2))
                    return format_results(dot(user, rel_size),
                                          dot(kern, rel_size),
                                          dot(wait, rel_size),
                                          dot(idle, rel_size), 0.0)
            else:
                self.logger.warn("CPUStats: unsupported platform")
                return False
        except Exception:
            self.logger.exception("Cannot compute CPU stats")
            return False
Ejemplo n.º 9
0
    def check(self, agentConfig):
        if Platform.is_linux():
            try:
                with open('/proc/meminfo', 'r') as mem_info:
                    lines = mem_info.readlines()
            except Exception:
                self.logger.exception(
                    'Cannot get memory metrics from /proc/meminfo')
                return False

            # $ cat /proc/meminfo
            # MemTotal:        7995360 kB
            # MemFree:         1045120 kB
            # Buffers:          226284 kB
            # Cached:           775516 kB
            # SwapCached:       248868 kB
            # Active:          1004816 kB
            # Inactive:        1011948 kB
            # Active(anon):     455152 kB
            # Inactive(anon):   584664 kB
            # Active(file):     549664 kB
            # Inactive(file):   427284 kB
            # Unevictable:     4392476 kB
            # Mlocked:         4392476 kB
            # SwapTotal:      11120632 kB
            # SwapFree:       10555044 kB
            # Dirty:              2948 kB
            # Writeback:             0 kB
            # AnonPages:       5203560 kB
            # Mapped:            50520 kB
            # Shmem:             10108 kB
            # Slab:             161300 kB
            # SReclaimable:     136108 kB
            # SUnreclaim:        25192 kB
            # KernelStack:        3160 kB
            # PageTables:        26776 kB
            # NFS_Unstable:          0 kB
            # Bounce:                0 kB
            # WritebackTmp:          0 kB
            # CommitLimit:    15118312 kB
            # Committed_AS:    6703508 kB
            # VmallocTotal:   34359738367 kB
            # VmallocUsed:      400668 kB
            # VmallocChunk:   34359329524 kB
            # HardwareCorrupted:     0 kB
            # HugePages_Total:       0
            # HugePages_Free:        0
            # HugePages_Rsvd:        0
            # HugePages_Surp:        0
            # Hugepagesize:       2048 kB
            # DirectMap4k:       10112 kB
            # DirectMap2M:     8243200 kB

            regexp = re.compile(
                r'^(\w+):\s+([0-9]+)'
            )  # We run this several times so one-time compile now
            meminfo = {}

            for line in lines:
                try:
                    match = re.search(regexp, line)
                    if match is not None:
                        meminfo[match.group(1)] = match.group(2)
                except Exception:
                    self.logger.exception("Cannot parse /proc/meminfo")

            memData = {}

            # Physical memory
            # FIXME units are in MB, we should use bytes instead
            try:
                memData['physTotal'] = int(meminfo.get('MemTotal', 0)) / 1024
                memData['physFree'] = int(meminfo.get('MemFree', 0)) / 1024
                memData['physBuffers'] = int(meminfo.get('Buffers', 0)) / 1024
                memData['physCached'] = int(meminfo.get('Cached', 0)) / 1024
                memData['physShared'] = int(meminfo.get('Shmem', 0)) / 1024

                memData[
                    'physUsed'] = memData['physTotal'] - memData['physFree']
                # Usable is relative since cached and buffers are actually used to speed things up.
                memData['physUsable'] = memData['physFree'] + memData[
                    'physBuffers'] + memData['physCached']

                if memData['physTotal'] > 0:
                    memData['physPctUsable'] = float(
                        memData['physUsable']) / float(memData['physTotal'])
            except Exception:
                self.logger.exception(
                    'Cannot compute stats from /proc/meminfo')

            # Swap
            # FIXME units are in MB, we should use bytes instead
            try:
                memData['swapTotal'] = int(meminfo.get('SwapTotal', 0)) / 1024
                memData['swapFree'] = int(meminfo.get('SwapFree', 0)) / 1024

                memData[
                    'swapUsed'] = memData['swapTotal'] - memData['swapFree']

                if memData['swapTotal'] > 0:
                    memData['swapPctFree'] = float(
                        memData['swapFree']) / float(memData['swapTotal'])
            except Exception:
                self.logger.exception('Cannot compute swap stats')

            return memData

        elif sys.platform == 'darwin':
            macV = platform.mac_ver()
            macV_minor_version = int(
                re.match(r'10\.(\d+)\.?.*', macV[0]).group(1))

            try:
                top = sp.Popen(['top', '-l 1'], stdout=sp.PIPE,
                               close_fds=True).communicate()[0]
                sysctl = sp.Popen(['sysctl', 'vm.swapusage'],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]
            except StandardError:
                self.logger.exception('getMemoryUsage')
                return False

            # Deal with top
            lines = top.split('\n')
            physParts = re.findall(r'([0-9]\d+)', lines[self.topIndex])

            # Deal with sysctl
            swapParts = re.findall(r'([0-9]+\.\d+)', sysctl)

            # Mavericks changes the layout of physical memory format in `top`
            physUsedPartIndex = 3
            physFreePartIndex = 4
            if macV and (macV_minor_version >= 9):
                physUsedPartIndex = 0
                physFreePartIndex = 2

            return {
                'physUsed': physParts[physUsedPartIndex],
                'physFree': physParts[physFreePartIndex],
                'swapUsed': swapParts[1],
                'swapFree': swapParts[2]
            }

        elif sys.platform.startswith("freebsd"):
            try:
                sysctl = sp.Popen(['sysctl', 'vm.stats.vm'],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]
            except Exception:
                self.logger.exception('getMemoryUsage')
                return False

            lines = sysctl.split('\n')

            # ...
            # vm.stats.vm.v_page_size: 4096
            # vm.stats.vm.v_page_count: 759884
            # vm.stats.vm.v_wire_count: 122726
            # vm.stats.vm.v_active_count: 109350
            # vm.stats.vm.v_cache_count: 17437
            # vm.stats.vm.v_inactive_count: 479673
            # vm.stats.vm.v_free_count: 30542
            # ...

            # We run this several times so one-time compile now
            regexp = re.compile(r'^vm\.stats\.vm\.(\w+):\s+([0-9]+)')
            meminfo = {}

            for line in lines:
                try:
                    match = re.search(regexp, line)
                    if match is not None:
                        meminfo[match.group(1)] = match.group(2)
                except Exception:
                    self.logger.exception(
                        "Cannot parse sysctl vm.stats.vm output")

            memData = {}

            # Physical memory
            try:
                pageSize = int(meminfo.get('v_page_size'))

                memData['physTotal'] = (int(meminfo.get('v_page_count', 0)) *
                                        pageSize) / 1048576
                memData['physFree'] = (int(meminfo.get('v_free_count', 0)) *
                                       pageSize) / 1048576
                memData['physCached'] = (int(meminfo.get('v_cache_count', 0)) *
                                         pageSize) / 1048576
                memData['physUsed'] = (
                    (int(meminfo.get('v_active_count'), 0) +
                     int(meminfo.get('v_wire_count', 0))) * pageSize) / 1048576
                memData['physUsable'] = (
                    (int(meminfo.get('v_free_count'), 0) +
                     int(meminfo.get('v_cache_count', 0)) +
                     int(meminfo.get('v_inactive_count', 0))) *
                    pageSize) / 1048576

                if memData['physTotal'] > 0:
                    memData['physPctUsable'] = float(
                        memData['physUsable']) / float(memData['physTotal'])
            except Exception:
                self.logger.exception(
                    'Cannot compute stats from /proc/meminfo')

            # Swap
            try:
                sysctl = sp.Popen(['swapinfo', '-m'],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]
            except Exception:
                self.logger.exception('getMemoryUsage')
                return False

            lines = sysctl.split('\n')

            # ...
            # Device          1M-blocks     Used    Avail Capacity
            # /dev/ad0s1b           570        0      570     0%
            # ...

            assert "Device" in lines[0]

            try:
                memData['swapTotal'] = 0
                memData['swapFree'] = 0
                memData['swapUsed'] = 0
                for line in lines[1:-1]:
                    line = line.split()
                    memData['swapTotal'] += int(line[1])
                    memData['swapFree'] += int(line[3])
                    memData['swapUsed'] += int(line[2])
            except Exception:
                self.logger.exception('Cannot compute stats from swapinfo')

            return memData
        elif sys.platform == 'sunos5':
            try:
                memData = {}
                kmem = sp.Popen(["kstat", "-c", "zone_memory_cap", "-p"],
                                stdout=sp.PIPE,
                                close_fds=True).communicate()[0]

                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:anon_alloc_fail   0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:anonpgin  0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:class     zone_memory_cap
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:crtime    16359935.0680834
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:execpgin  185
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:fspgin    2556
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:n_pf_throttle     0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:n_pf_throttle_usec        0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:nover     0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:pagedout  0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:pgpgin    2741
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:physcap   536870912  <--
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:rss       115544064  <--
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:snaptime  16787393.9439095
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:swap      91828224   <--
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:swapcap   1073741824 <--
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:zonename  53aa9b7e-48ba-4152-a52b-a6368c3d9e7c

                # turn memory_cap:360:zone_name:key value
                # into { "key": value, ...}
                kv = [
                    l.strip().split() for l in kmem.split("\n") if len(l) > 0
                ]
                entries = dict([(k.split(":")[-1], v) for (k, v) in kv])
                # extract rss, physcap, swap, swapcap, turn into MB
                convert = lambda v: int(long(v)) / 2**20
                memData["physTotal"] = convert(entries["physcap"])
                memData["physUsed"] = convert(entries["rss"])
                memData[
                    "physFree"] = memData["physTotal"] - memData["physUsed"]
                memData["swapTotal"] = convert(entries["swapcap"])
                memData["swapUsed"] = convert(entries["swap"])
                memData[
                    "swapFree"] = memData["swapTotal"] - memData["swapUsed"]

                if memData['swapTotal'] > 0:
                    memData['swapPctFree'] = float(
                        memData['swapFree']) / float(memData['swapTotal'])
                return memData
            except Exception:
                self.logger.exception(
                    "Cannot compute mem stats from kstat -c zone_memory_cap")
                return False
        else:
            return False
Ejemplo n.º 10
0
    def check(self, agentConfig):
        """Capture io stats.

        @rtype dict
        @return {"device": {"metric": value, "metric": value}, ...}
        """
        io = {}
        try:
            if Platform.is_linux():
                stdout = sp.Popen(['iostat', '-d', '1', '2', '-x', '-k'],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]

                #                 Linux 2.6.32-343-ec2 (ip-10-35-95-10)   12/11/2012      _x86_64_        (2 CPU)
                #
                # Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await  svctm  %util
                # sda1              0.00    17.61    0.26   32.63     4.23   201.04    12.48     0.16    4.81   0.53   1.73
                # sdb               0.00     2.68    0.19    3.84     5.79    26.07    15.82     0.02    4.93   0.22   0.09
                # sdg               0.00     0.13    2.29    3.84   100.53    30.61    42.78     0.05    8.41   0.88   0.54
                # sdf               0.00     0.13    2.30    3.84   100.54    30.61    42.78     0.06    9.12   0.90   0.55
                # md0               0.00     0.00    0.05    3.37     1.41    30.01    18.35     0.00    0.00   0.00   0.00
                #
                # Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await  svctm  %util
                # sda1              0.00     0.00    0.00   10.89     0.00    43.56     8.00     0.03    2.73   2.73   2.97
                # sdb               0.00     0.00    0.00    2.97     0.00    11.88     8.00     0.00    0.00   0.00   0.00
                # sdg               0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00
                # sdf               0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00
                # md0               0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00
                io.update(self._parse_linux2(stdout))

            elif sys.platform == "sunos5":
                iostat = sp.Popen(["iostat", "-x", "-d", "1", "2"],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]

                #                   extended device statistics <-- since boot
                # device      r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b
                # ramdisk1    0.0    0.0    0.1    0.1  0.0  0.0    0.0   0   0
                # sd0         0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0
                # sd1        79.9  149.9 1237.6 6737.9  0.0  0.5    2.3   0  11
                #                   extended device statistics <-- past second
                # device      r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b
                # ramdisk1    0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0
                # sd0         0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0
                # sd1         0.0  139.0    0.0 1850.6  0.0  0.0    0.1   0   1

                # discard the first half of the display (stats since boot)
                lines = [l for l in iostat.split("\n") if len(l) > 0]
                lines = lines[len(lines) / 2:]

                assert "extended device statistics" in lines[0]
                headers = lines[1].split()
                assert "device" in headers
                for l in lines[2:]:
                    cols = l.split()
                    # cols[0] is the device
                    # cols[1:] are the values
                    io[cols[0]] = {}
                    for i in range(1, len(cols)):
                        io[cols[0]][self.xlate(headers[i], "sunos")] = cols[i]

            elif sys.platform.startswith("freebsd"):
                iostat = sp.Popen(["iostat", "-x", "-d", "1", "2"],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]

                # Be careful!
                # It looks like SunOS, but some columms (wait, svc_t) have different meaning
                #                        extended device statistics
                # device     r/s   w/s    kr/s    kw/s wait svc_t  %b
                # ad0        3.1   1.3    49.9    18.8    0   0.7   0
                #                         extended device statistics
                # device     r/s   w/s    kr/s    kw/s wait svc_t  %b
                # ad0        0.0   2.0     0.0    31.8    0   0.2   0

                # discard the first half of the display (stats since boot)
                lines = [l for l in iostat.split("\n") if len(l) > 0]
                lines = lines[len(lines) / 2:]

                assert "extended device statistics" in lines[0]
                headers = lines[1].split()
                assert "device" in headers
                for l in lines[2:]:
                    cols = l.split()
                    # cols[0] is the device
                    # cols[1:] are the values
                    io[cols[0]] = {}
                    for i in range(1, len(cols)):
                        io[cols[0]][self.xlate(headers[i],
                                               "freebsd")] = cols[i]
            elif sys.platform == 'darwin':
                iostat = sp.Popen(['iostat', '-d', '-c', '2', '-w', '1'],
                                  stdout=sp.PIPE,
                                  close_fds=True).communicate()[0]
                #          disk0           disk1          <-- number of disks
                #    KB/t tps  MB/s     KB/t tps  MB/s
                #   21.11  23  0.47    20.01   0  0.00
                #    6.67   3  0.02     0.00   0  0.00    <-- line of interest
                io = self._parse_darwin(iostat)
            else:
                return False

            # If we filter devices, do it know.
            device_blacklist_re = agentConfig.get('device_blacklist_re', None)
            if device_blacklist_re:
                filtered_io = {}
                for device, stats in io.iteritems():
                    if not device_blacklist_re.match(device):
                        filtered_io[device] = stats
            else:
                filtered_io = io
            return filtered_io

        except Exception:
            self.logger.exception("Cannot extract IO statistics")
            return False
Ejemplo n.º 11
0
    def check(self, agentConfig):
        self.logger.debug('getCPUStats: start')

        cpu_stats = {}

        if sys.platform == 'linux2':
            self.logger.debug('getCPUStats: linux2')

            headerRegexp = re.compile(r'.*?([%][a-zA-Z0-9]+)[\s+]?')
            itemRegexp = re.compile(r'.*?\s+(\d+)[\s+]?')
            valueRegexp = re.compile(r'\d+\.\d+')
            proc = None
            try:
                proc = subprocess.Popen(['mpstat', '-P', 'ALL', '1', '1'], stdout=subprocess.PIPE, close_fds=True)
                stats = proc.communicate()[0]

                if int(pythonVersion[1]) >= 6:
                    try:
                        proc.kill()
                    except Exception:
                        self.logger.debug('Process already terminated')

                stats = stats.split('\n')
                header = stats[2]
                headerNames = re.findall(headerRegexp, header)
                device = None

                for statsIndex in range(3, len(stats)):
                    row = stats[statsIndex]

                    if not row:  # skip the averages
                        break

                    deviceMatch = re.match(itemRegexp, row)

                    if string.find(row, 'all') is not -1:
                        device = 'ALL'
                    elif deviceMatch is not None:
                        device = 'CPU%s' % deviceMatch.groups()[0]

                    values = re.findall(valueRegexp, row.replace(',', '.'))

                    cpu_stats[device] = {}
                    for headerIndex in range(0, len(headerNames)):
                        headerName = headerNames[headerIndex]
                        cpu_stats[device][headerName] = values[headerIndex]

            except OSError:
                # we dont have it installed return nothing
                return False

            except Exception as exception:
                import traceback
                self.logger.error("getCPUStats: exception = %s", traceback.format_exc())

                if int(pythonVersion[1]) >= 6:
                    try:
                        if proc is not None:
                            proc.kill()
                    except UnboundLocalError:
                        self.logger.debug('Process already terminated')
                    except Exception:
                        self.logger.debug('Process already terminated')

                return False

        elif sys.platform == 'darwin':
            self.logger.debug('getCPUStats: darwin')

            try:
                proc = subprocess.Popen(['sar', '-u', '1', '2'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
                stats = proc.communicate()[0]

                itemRegexp = re.compile(r'\s+(\d+)[\s+]?')
                titleRegexp = re.compile(r'.*?([%][a-zA-Z0-9]+)[\s+]?')
                titles = []
                values = []
                for line in stats.split('\n'):
                    # top line with the titles in
                    if '%' in line:
                        titles = re.findall(titleRegexp, line)
                    if line and line.startswith('Average:'):
                        values = re.findall(itemRegexp, line)

                if values and titles:
                    cpu_stats['CPUs'] = dict(zip(titles, values))

            except Exception:
                import traceback
                self.logger.error('getCPUStats: exception = %s', traceback.format_exc())
                return False

        else:
            self.logger.debug('getCPUStats: unsupported platform')
            return False

        self.logger.debug('getCPUStats: completed, returning')
        return {'cpuStats': cpu_stats}
Ejemplo n.º 12
0
    def _populate_payload_metadata(self,
                                   payload,
                                   check_statuses,
                                   start_event=True):
        """
        Periodically populate the payload with metadata related to the system, host, and/or checks.
        """
        now = time.time()

        # Include system stats on first postback
        if start_event and self._is_first_run():
            payload['systemStats'] = self.agentConfig.get('system_stats', {})
            # Also post an event in the newsfeed
            payload['events']['System'] = [{
                'api_key':
                self.agentConfig['api_key'],
                'host':
                payload['internalHostname'],
                'timestamp':
                now,
                'event_type':
                'Agent Startup',
                'msg_text':
                'Version %s' % get_version()
            }]

        # Periodically send the host metadata.
        if self._should_send_additional_data('host_metadata'):
            # gather metadata with gohai
            try:
                if get_os() != 'windows':
                    command = "gohai"
                else:
                    command = "gohai\gohai.exe"
                gohai_metadata, gohai_log = subprocess.Popen(
                    [command], stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE).communicate()
                payload['gohai'] = gohai_metadata
                if gohai_log:
                    log.warning("GOHAI LOG | {0}".format(gohai_log))
            except OSError as e:
                if e.errno == 2:  # file not found, expected when install from source
                    log.info("gohai file not found")
                else:
                    raise e
            except Exception as e:
                log.warning("gohai command failed with error %s" % str(e))

            payload['systemStats'] = get_system_stats()
            payload['meta'] = self._get_hostname_metadata()

            self.hostname_metadata_cache = payload['meta']
            # Add static tags from the configuration file
            host_tags = []
            if self.agentConfig['tags'] is not None:
                host_tags.extend([
                    unicode(tag.strip())
                    for tag in self.agentConfig['tags'].split(",")
                ])

            if self.agentConfig['collect_ec2_tags']:
                host_tags.extend(EC2.get_tags(self.agentConfig))

            if host_tags:
                payload['host-tags']['system'] = host_tags

            GCE_tags = GCE.get_tags(self.agentConfig)
            if GCE_tags is not None:
                payload['host-tags'][GCE.SOURCE_TYPE_NAME] = GCE_tags

            # Log the metadata on the first run
            if self._is_first_run():
                log.info(
                    "Hostnames: %s, tags: %s" %
                    (repr(self.hostname_metadata_cache), payload['host-tags']))

        # Periodically send extra hosts metadata (vsphere)
        # Metadata of hosts that are not the host where the agent runs, not all the checks use
        # that
        external_host_tags = []
        if self._should_send_additional_data('external_host_tags'):
            for check in self.initialized_checks_d:
                try:
                    getter = getattr(check, 'get_external_host_tags')
                    check_tags = getter()
                    external_host_tags.extend(check_tags)
                except AttributeError:
                    pass

        if external_host_tags:
            payload['external_host_tags'] = external_host_tags

        # Periodically send agent_checks metadata
        if self._should_send_additional_data('agent_checks'):
            # Add agent checks statuses and error/warning messages
            agent_checks = []
            for check in check_statuses:
                if check.instance_statuses is not None:
                    for i, instance_status in enumerate(
                            check.instance_statuses):
                        agent_checks.append((
                            check.name,
                            check.source_type_name,
                            instance_status.instance_id,
                            instance_status.status,
                            # put error message or list of warning messages in the same field
                            # it will be handled by the UI
                            instance_status.error or instance_status.warnings
                            or "",
                            check.service_metadata[i]))
                else:
                    agent_checks.append(
                        (check.name, check.source_type_name, "initialization",
                         check.status, repr(check.init_failed_error)))
            payload['agent_checks'] = agent_checks
            payload[
                'meta'] = self.hostname_metadata_cache  # add hostname metadata

        # If required by the user, let's create the dd_check:xxx host tags
        if self.agentConfig['create_dd_check_tags'] and \
                self._should_send_additional_data('dd_check_tags'):
            app_tags_list = [
                DD_CHECK_TAG.format(c.name) for c in self.initialized_checks_d
            ]
            app_tags_list.extend([
                DD_CHECK_TAG.format(cname)
                for cname in JMXFiles.get_jmx_appnames()
            ])

            if 'system' not in payload['host-tags']:
                payload['host-tags']['system'] = []

            payload['host-tags']['system'].extend(app_tags_list)
Ejemplo n.º 13
0
    def _start(self, path_to_java, java_run_opts, jmx_checks, command,
               reporter, tools_jar_path, custom_jar_paths,
               redirect_std_streams):
        if reporter is None:
            statsd_host = self.agentConfig.get('bind_host', 'localhost')
            statsd_port = self.agentConfig.get('monitorstatsd_port', "8125")
            reporter = "statsd:%s:%s" % (statsd_host, statsd_port)

        log.info("Starting jmxfetch:")
        try:
            path_to_java = path_to_java or "java"
            java_run_opts = java_run_opts or ""
            path_to_jmxfetch = self._get_path_to_jmxfetch()
            path_to_status_file = JMXFiles.get_status_file_path()

            classpath = path_to_jmxfetch
            if tools_jar_path is not None:
                classpath = r"%s:%s" % (tools_jar_path, classpath)
            if custom_jar_paths:
                classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath)

            subprocess_args = [
                path_to_java,
                '-classpath',
                classpath,
                JMXFETCH_MAIN_CLASS,
                '--check_period',
                str(self.check_frequency * 1000),
                '--conf_directory',
                r"%s" % self.confd_path,
                '--log_level',
                JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"),
                                       "INFO"),
                '--log_location',
                r"%s" % self.logging_config.get('jmxfetch_log_file'),
                '--reporter',
                reporter,
                '--status_location',
                r"%s" % path_to_status_file,
                command,
            ]

            if Platform.is_windows():
                path_to_exit_file = JMXFiles.get_python_exit_file_path()
                subprocess_args.insert(
                    len(subprocess_args) - 1, '--exit_file_location')
                subprocess_args.insert(
                    len(subprocess_args) - 1, path_to_exit_file)

            subprocess_args.insert(4, '--check')
            for check in jmx_checks:
                subprocess_args.insert(5, check)

            if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION
            if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts:
                java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION

            for opt in java_run_opts.split():
                subprocess_args.insert(1, opt)

            log.info("Running %s" % " ".join(subprocess_args))

            with nested(tempfile.TemporaryFile(),
                        tempfile.TemporaryFile()) as (stdout_f, stderr_f):
                jmx_process = subprocess.Popen(
                    subprocess_args,
                    close_fds=not redirect_std_streams,
                    stdout=stdout_f if redirect_std_streams else None,
                    stderr=stderr_f if redirect_std_streams else None)
                self.jmx_process = jmx_process

                self.register_signal_handlers()

                jmx_process.wait()

                if redirect_std_streams:
                    stderr_f.seek(0)
                    err = stderr_f.read()
                    stdout_f.seek(0)
                    out = stdout_f.read()
                    sys.stdout.write(out)
                    sys.stderr.write(err)

            return jmx_process.returncode

        except OSError:
            java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?"
            log.exception(java_path_msg)
            invalid_checks = {}
            for check in jmx_checks:
                check_name = check.split('.')[0]
                check_name = check_name.encode('ascii', 'ignore')
                invalid_checks[check_name] = java_path_msg
            JMXFiles.write_status_file(invalid_checks)
            raise
        except Exception:
            log.exception("Couldn't launch JMXFetch")
            raise