Example #1
0
    def __call__(self):
        with utils.lower_privileges(self._logger):

            # We just care about ethN and emN interfaces.  We specifically
            # want to avoid bond interfaces, because interface
            # stats are still kept on the child interfaces when
            # you bond.  By skipping bond we avoid double counting.

            self.f_netdev.seek(0)
            ts = int(time.time())
            for line in self.f_netdev:
                m = re.match(
                    "\s+(eth?\d+|em\d+_\d+/\d+|em\d+_\d+|em\d+|"
                    "p\d+p\d+_\d+/\d+|p\d+p\d+_\d+|p\d+p\d+):(.*)", line)
                if not m:
                    continue
                intf = m.group(1)
                stats = m.group(2).split(None)

                def direction(idx):
                    if idx >= 8:
                        return "out"
                    return "in"

                for i in xrange(16):
                    self._readq.nput(
                        "proc.net.%s.%s %d %s iface=%s" %
                        (FIELDS[i], direction(i), ts, stats[i], intf))
Example #2
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         # Listing bucket everytime so as to start collecting datapoints
         # of any new bucket.
         buckets = list_bucket(self.bin_dir)
         for b in buckets:
             collect_stats(self.bin_dir, b, self._readq)
Example #3
0
    def __init__(self, config, logger, readq):
        super(Netstat, self).__init__(config, logger, readq)

        self.page_size = resource.getpagesize()
        try:
            self.sockstat = open("/proc/net/sockstat")
            self.netstat = open("/proc/net/netstat")
            self.snmp = open("/proc/net/snmp")
        except IOError:
            self._readq.nput("netstat.state %s %s" % (int(time.time()), '1'))
            self.log_exception('open failed')
            self.cleanup()
            raise
        with utils.lower_privileges(self._logger):
            # Note: up until v2.6.37-rc2 most of the values were 32 bits.
            # The first value is pretty useless since it accounts for some
            # socket types but not others.  So we don't report it because it's
            # more confusing than anything else and it's not well documented
            # what type of sockets are or aren't included in this count.
            self.regexp = re.compile(
                "sockets: used \d+\n"
                "TCP: inuse (?P<tcp_inuse>\d+) orphan (?P<orphans>\d+)"
                " tw (?P<tw_count>\d+) alloc (?P<tcp_sockets>\d+)"
                " mem (?P<tcp_pages>\d+)\n"
                "UDP: inuse (?P<udp_inuse>\d+)"
                # UDP memory accounting was added in v2.6.25-rc1
                "(?: mem (?P<udp_pages>\d+))?\n"
                # UDP-Lite (RFC 3828) was added in v2.6.20-rc2
                "(?:UDPLITE: inuse (?P<udplite_inuse>\d+)\n)?"
                "RAW: inuse (?P<raw_inuse>\d+)\n"
                "FRAG: inuse (?P<ip_frag_nqueues>\d+)"
                " memory (?P<ip_frag_mem>\d+)\n")
Example #4
0
def main(argv):
    with utils.lower_privileges(self._logger):
        socket.setdefaulttimeout(DEFAULT_TIMEOUT)
        servers = []

        if json is None:
            utils.err("This collector requires the `json' Python module.")
            return 1

        for conf in elasticsearch_conf.get_servers():
            server = httplib.HTTPConnection(*conf)
            try:
                server.connect()
            except socket.error, (erno, e):
                if erno == errno.ECONNREFUSED:
                    continue
                raise
            servers.append(server)

        if len(servers) == 0:
            return 13  # No ES running, ask tcollector to not respawn us.

        status = node_status(server)
        version = status["version"]["number"]

        while True:
            for server in servers:
                _collect_server(server, version)
            time.sleep(COLLECTION_INTERVAL)
Example #5
0
def main(argv):
  with utils.lower_privileges(self._logger):
      socket.setdefaulttimeout(DEFAULT_TIMEOUT)
      servers = []

      if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 1

      for conf in elasticsearch_conf.get_servers():
        server = httplib.HTTPConnection( *conf )
        try:
          server.connect()
        except socket.error, (erno, e):
          if erno == errno.ECONNREFUSED:
            continue
          raise
        servers.append( server )

      if len( servers ) == 0:
        return 13  # No ES running, ask tcollector to not respawn us.

      status = node_status(server)
      version = status["version"]["number"]

      while True:
        for server in servers:
          _collect_server(server, version)
        time.sleep(COLLECTION_INTERVAL)
Example #6
0
def main():
    with utils.lower_privileges(self._logger):
        if pymongo is None:
           print >>sys.stderr, "error: Python module `pymongo' is missing"
           return 13

        c = pymongo.Connection(host=HOST, port=PORT)

        while True:
            res = c.admin.command('serverStatus')
            ts = int(time.time())

            for base_metric, tags in TAG_METRICS:
                for tag in tags:
                    print 'mongo.%s %d %s type=%s' % (base_metric, ts,
                                                      res[base_metric][tag], tag)
            for metric in METRICS:
                cur = res
                try:
                    for m in metric.split('.'):
                        cur = cur[m]
                except KeyError:
                    continue
                print 'mongo.%s %d %s' % (metric, ts, cur)

            sys.stdout.flush()
            time.sleep(INTERVAL)
Example #7
0
    def __init__(self, config, logger, readq):
        super(Mongo3, self).__init__(config, logger, readq)
        self.loadEnv()
        try:
            with utils.lower_privileges(self._logger):
                if pymongo is None:
                    self.log_error('can not load pymongo module')
                    self._readq.nput("mongo3.state %s %s" % (int(time.time()), '1'))

                for index, item in enumerate(CONFIG_CONN, start=0):
                    conn = pymongo.MongoClient(host=item['host'], port=item['port'])
                    if USER:
                        conn.admin.authenticate(USER, PASS, mechanism='DEFAULT')
                    CONFIG_CONN[index]['link'] = conn

                for index, item in enumerate(MONGOS_CONN, start=0):
                    conn = pymongo.MongoClient(host=item['host'], port=item['port'])
                    if USER:
                        conn.admin.authenticate(USER, PASS, mechanism='DEFAULT')
                    MONGOS_CONN[index]['link'] = conn

                for index, item in enumerate(REPLICA_CONN, start=0):
                    conn = pymongo.MongoClient(host=item['host'], port=item['port'])
                    if USER:
                        conn.admin.authenticate(USER, PASS, mechanism='DEFAULT')
                    REPLICA_CONN[index]['link'] = conn
        except:
            self._readq.nput("mongo3.state %s %s" % (int(time.time()), '1'))
Example #8
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         # Listing bucket everytime so as to start collecting datapoints
         # of any new bucket.
         buckets = list_bucket(self.bin_dir)
         for b in buckets:
             collect_stats(self.bin_dir, b, self._readq)
Example #9
0
    def __init__(self, config, logger, readq):
        super(Netstat, self).__init__(config, logger, readq)

        self.page_size = resource.getpagesize()
        try:
            self.sockstat = open("/proc/net/sockstat")
            self.netstat = open("/proc/net/netstat")
            self.snmp = open("/proc/net/snmp")
        except IOError:
            self.log_exception('open failed')
            self.cleanup()
            raise
        with utils.lower_privileges(self._logger):
            # Note: up until v2.6.37-rc2 most of the values were 32 bits.
            # The first value is pretty useless since it accounts for some
            # socket types but not others.  So we don't report it because it's
            # more confusing than anything else and it's not well documented
            # what type of sockets are or aren't included in this count.
            self.regexp = re.compile("sockets: used \d+\n"
                                     "TCP: inuse (?P<tcp_inuse>\d+) orphan (?P<orphans>\d+)"
                                     " tw (?P<tw_count>\d+) alloc (?P<tcp_sockets>\d+)"
                                     " mem (?P<tcp_pages>\d+)\n"
                                     "UDP: inuse (?P<udp_inuse>\d+)"
                                     # UDP memory accounting was added in v2.6.25-rc1
                                     "(?: mem (?P<udp_pages>\d+))?\n"
                                     # UDP-Lite (RFC 3828) was added in v2.6.20-rc2
                                     "(?:UDPLITE: inuse (?P<udplite_inuse>\d+)\n)?"
                                     "RAW: inuse (?P<raw_inuse>\d+)\n"
                                     "FRAG: inuse (?P<ip_frag_nqueues>\d+)"
                                     " memory (?P<ip_frag_mem>\d+)\n")
 def __call__(self):
     with utils.lower_privileges(self._logger):
         if json:
             self._readq.nput("hadoop.namenode.state %s %s" % (int(time.time()), '0'))
             HadoopNode(self.service, self.daemon, self.host, self.port, REPLACEMENTS, self.readq, self._logger).emit()
         else:
             self._readq.nput("hadoop.namenode.state %s %s" % (int(time.time()), '1'))
             self.logger.error("This collector requires the `json' Python module.")
 def __call__(self):
     with utils.lower_privileges(self._logger):
         if json:
             HBaseRegionserverHttp(self.port, self.logger,
                                   self.readq).emit()
         else:
             self.logger.error(
                 "This collector requires the `json' Python module.")
Example #12
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            # Connect to Docker socket to get informations about containers every 4 times
            if self.cache == 0:
                self.containernames = {}
                self.containerimages = {}
            self.cache += 1
            if self.cache == 4:
                self.cache = 0

            if os.path.isdir(self.cgroup_path):
                for level1 in os.listdir(self.cgroup_path):
                    if (os.path.isdir(self.cgroup_path + "/" + level1 +
                                      "/docker") and
                            # /cgroup/cpu and /cgroup/cpuacct are often links to /cgroup/cpu,cpuacct
                            not (((level1 == "cpu,cpuacct") or
                                  (level1 == "cpuacct")) and
                                 (os.path.isdir(self.cgroup_path +
                                                "/cpu/docker")))):
                        for level2 in os.listdir(self.cgroup_path + "/" +
                                                 level1 + "/docker"):
                            if os.path.isdir(self.cgroup_path + "/" + level1 +
                                             "/docker/" + level2):
                                self.readdockerstats(
                                    self.cgroup_path + "/" + level1 +
                                    "/docker/" + level2, level2)
                    else:
                        # If Docker cgroup is handled by slice
                        # http://www.freedesktop.org/software/systemd/man/systemd.slice.html
                        for slicename in ("system.slice", "machine.slice",
                                          "user.slice"):
                            if (os.path.isdir(self.cgroup_path + "/" + level1 +
                                              "/" + slicename) and
                                    # /cgroup/cpu and /cgroup/cpuacct are often links to /cgroup/cpu,cpuacct
                                    not (
                                        ((level1 == "cpu,cpuacct") or
                                         (level1 == "cpuacct")) and
                                        (os.path.isdir(self.cgroup_path +
                                                       "/cpu/" + slicename)))):
                                for level2 in os.listdir(self.cgroup_path +
                                                         "/" + level1 + "/" +
                                                         slicename):
                                    if os.path.isdir(self.cgroup_path + "/" +
                                                     level1 + "/" + slicename +
                                                     "/" + level2):
                                        m = re.search("^docker-(\w+)\.scope$",
                                                      level2)
                                        if m:
                                            self.readdockerstats(
                                                self.cgroup_path + "/" +
                                                level1 + "/" + slicename +
                                                "/" + level2, m.group(1))
                                            break
            if os.path.isdir(self.cgroup_path + "/lxc"):
                for level1 in os.listdir(self.cgroup_path + "/lxc"):
                    if os.path.isdir(self.cgroup_path + "/lxc/" + level1):
                        self.readdockerstats(
                            self.cgroup_path + "/lxc/" + level1, level1)
Example #13
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         try:
             # collect period 60 secs
             url = self.get_config('stats_url', 'http://localhost:9999/stats.txt')
             response = urllib2.urlopen(url)
             content = response.read()
             return self.process(content)
         except:
             self.log_exception('unexpected error.')
Example #14
0
 def __init__(self, config, logger, readq):
     super(Docker, self).__init__(config, logger, readq)
     self.containernames = {}
     self.containerimages = {}
     with utils.lower_privileges(self._logger):
         self.cache = 0
         if platform.dist()[0] in ['centos', 'redhat'] and not platform.dist()[1].startswith("7."):
             self.cgroup_path = '/cgroup'
         else:
             self.cgroup_path = '/sys/fs/cgroup'
         self.socket_path = '/var/run/docker.sock'
Example #15
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         try:
             # collect period 60 secs
             url = self.get_config('stats_url',
                                   'http://localhost:9999/stats.txt')
             response = urllib2.urlopen(url)
             content = response.read()
             return self.process(content)
         except:
             self.log_exception('unexpected error.')
Example #16
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            counter = {}

            for procfile in (self.tcp, self.tcp6):
                if procfile is None:
                    continue
                procfile.seek(0)
                ts = int(time.time())
                for line in procfile:
                    try:
                        # pylint: disable=W0612
                        (num, src, dst, state, queue, when, retrans, uid,
                         timeout, inode) = line.split(None, 9)
                    except ValueError:  # Malformed line
                        continue

                    if num == "sl":  # header
                        continue

                    srcport = src.split(":")[1]
                    dstport = dst.split(":")[1]
                    srcport = int(srcport, 16)
                    dstport = int(dstport, 16)
                    service = PORTS.get(srcport, "other")
                    service = PORTS.get(dstport, service)

                    if is_public_ip(dst) or is_public_ip(src):
                        endpoint = "external"
                    else:
                        endpoint = "internal"

                    user = self.uids.get(uid, "other")

                    key = "state=" + TCPSTATES[state] + " endpoint=" + endpoint + \
                          " service=" + service + " user="******"other", ):
                    key = ("state=%s service=%s" % (TCPSTATES[state], service))
                    if key in counter:
                        self._readq.nput("proc.net.tcp {0} {1} {2}".format(
                            ts, counter[key], key))
                    else:
                        self._readq.nput("proc.net.tcp {0} {1} {2}".format(
                            ts, "0", key))

            self._readq.nput("procnettcp.state %s %s" %
                             (int(time.time()), '0'))
Example #17
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         try:
             self.get_container_stats(self.alauda_session, DEFAULT_NAMESPACE, DEFAULT_TOKEN)
             self.numExceptionHit = 0
         except Exception:
             self.log_exception('exception collecting Alauda docker metrics')
             self.numExceptionHit += 1
             if self.numExceptionHit > MAX_EXCEPTION_HIT:
                 self.cleanup()
                 self._init_alauda_session()
Example #18
0
def main():
    if not (graphite_bridge_conf and graphite_bridge_conf.enabled()):
      sys.exit(13)
    with utils.lower_privileges(self._logger):

        server = GraphiteServer((HOST, PORT), GraphiteHandler)
        server.daemon_threads = True
        try:
            server.serve_forever()
        except KeyboardInterrupt:
            server.shutdown()
            server.server_close()
Example #19
0
def main():
    if not (graphite_bridge_conf and graphite_bridge_conf.enabled()):
        sys.exit(13)
    with utils.lower_privileges(self._logger):

        server = GraphiteServer((HOST, PORT), GraphiteHandler)
        server.daemon_threads = True
        try:
            server.serve_forever()
        except KeyboardInterrupt:
            server.shutdown()
            server.server_close()
Example #20
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         if json:
             self._readq.nput("hbase.regionserver.state %s %s" %
                              (int(time.time()), '0'))
             HBaseRegionserverHttp(self.host, self.port, self.logger,
                                   self.readq).emit()
         else:
             self._readq.nput("hbase.regionserver.state %s %s" %
                              (int(time.time()), '1'))
             self.logger.error(
                 "This collector requires the `json' Python module.")
Example #21
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            counter = {}

            for procfile in (self.tcp, self.tcp6):
                if procfile is None:
                    continue
                procfile.seek(0)
                ts = int(time.time())
                for line in procfile:
                    try:
                        # pylint: disable=W0612
                        (num, src, dst, state, queue, when, retrans,
                         uid, timeout, inode) = line.split(None, 9)
                    except ValueError:  # Malformed line
                        continue

                    if num == "sl":  # header
                        continue

                    srcport = src.split(":")[1]
                    dstport = dst.split(":")[1]
                    srcport = int(srcport, 16)
                    dstport = int(dstport, 16)
                    service = PORTS.get(srcport, "other")
                    service = PORTS.get(dstport, service)

                    if is_public_ip(dst) or is_public_ip(src):
                        endpoint = "external"
                    else:
                        endpoint = "internal"

                    user = self.uids.get(uid, "other")

                    key = "state=" + TCPSTATES[state] + " endpoint=" + endpoint + \
                          " service=" + service + " user="******"other",):
                    for user in USERS + ("other",):
                        for endpoint in ("internal", "external"):
                            key = ("state=%s endpoint=%s service=%s user=%s"
                                   % (TCPSTATES[state], endpoint, service, user))
                            if key in counter:
                                self._readq.nput("proc.net.tcp {0} {1} {2}".format(ts, counter[key], key))
                            else:
                                self._readq.nput("proc.net.tcp {0} {1} {2}".format(ts, "0", key))
Example #22
0
 def __init__(self, config, logger, readq):
     super(Docker, self).__init__(config, logger, readq)
     self.containernames = {}
     self.containerimages = {}
     with utils.lower_privileges(self._logger):
         self.cache = 0
         if platform.dist()[0] in [
                 'centos', 'redhat'
         ] and not platform.dist()[1].startswith("7."):
             self.cgroup_path = '/cgroup'
         else:
             self.cgroup_path = '/sys/fs/cgroup'
         self.socket_path = '/var/run/docker.sock'
Example #23
0
def main():
    if not (udp_bridge_conf and udp_bridge_conf.enabled()):
        sys.exit(13)
    with utils.lower_privileges(self._logger):

        def removePut(line):
            if line.startswith('put '):
                return line[4:]
            else:
                return line

        try:
            if (udp_bridge_conf and udp_bridge_conf.usetcp()):
                sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            else:
                sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            sock.bind((HOST, PORT))
        except socket.error, msg:
            utils.err('could not open socket: %s' % msg)
            sys.exit(1)

        try:
            flush_delay = udp_bridge_conf.flush_delay()
        except AttributeError:
            flush_delay = 60

        flush_timeout = int(time.time())
        try:
            try:
                while 1:
                    data, address = sock.recvfrom(SIZE)
                    if data:
                        lines = data.splitlines()
                        data = '\n'.join(map(removePut, lines))
                    if not data:
                        utils.err("invalid data")
                        break
                    print data
                    now = int(time.time())
                    if now > flush_timeout:
                        sys.stdout.flush()
                        flush_timeout = now + flush_delay

            except KeyboardInterrupt:
                utils.err("keyboard interrupt, exiting")
        finally:
            sock.close()
Example #24
0
def main():
    if not (udp_bridge_conf and udp_bridge_conf.enabled()):
      sys.exit(13)
    with utils.lower_privileges(self._logger):

        def removePut(line):
            if line.startswith('put '):
                return line[4:]
            else:
                return line

        try:
            if (udp_bridge_conf and udp_bridge_conf.usetcp()):
              sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            else:
              sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            sock.bind((HOST, PORT))
        except socket.error, msg:
            utils.err('could not open socket: %s' % msg)
            sys.exit(1)

        try:
            flush_delay = udp_bridge_conf.flush_delay()
        except AttributeError:
            flush_delay = 60

        flush_timeout = int(time.time())
        try:
            try:
                while 1:
                    data, address = sock.recvfrom(SIZE)
                    if data:
                        lines = data.splitlines()
                        data = '\n'.join(map(removePut, lines))
                    if not data:
                        utils.err("invalid data")
                        break
                    print data
                    now = int(time.time())
                    if now > flush_timeout:
                        sys.stdout.flush()
                        flush_timeout = now + flush_delay

            except KeyboardInterrupt:
                utils.err("keyboard interrupt, exiting")
        finally:
            sock.close()
Example #25
0
 def call(self, metric):
     try:
         with utils.lower_privileges(self._logger):
             if json:
                 self.exeClass(self.service, self.daemon, self.host,
                               self.port, self.REPLACEMENTS, self.readq,
                               self._logger).emit()
                 self._readq.nput("%s %s %s" %
                                  (metric, int(time.time()), '0'))
             else:
                 self._readq.nput("%s %s %s" %
                                  (metric, int(time.time()), '1'))
                 self.log_error(
                     "This collector requires the `json' Python module.")
     except Exception, e:
         self._readq.nput("%s %s %s" % (metric, int(time.time()), '1'))
         self.log_error("metric is %s error is %s" % (metric, str(e)))
    def __call__(self):
        with utils.lower_privileges(self._logger):
            try:
                self.validate_config()
                regions = ec2_list_regions()
                for reg in regions:
                    for statistic in STATISTICS:
                        t = threading.Thread(target=self.handle_region, kwargs={"region": reg, "statistic": statistic})
                        t.start()
                while threading.activeCount() > 1:
                    time.sleep(1)
            except exceptions.KeyboardInterrupt:
                return 0
            except:
                raise

            if not sendQueue.empty():
                self.send_metrics()
Example #27
0
def main():
    if not (jolokia_conf and jolokia_conf.enabled()):
        utils.err("Jolokia collector disable by config")
        sys.exit(13)
    with utils.lower_privileges(self._logger):

        CONFIG = jolokia_conf.get_config()
        instances = []

        for instance in CONFIG['instances']:
            if 'common_tags' in CONFIG:
                if 'tags' in instance:
                    instance['tags'].update(CONFIG['common_tags'])
                else:
                    instance['tags'] = CONFIG['common_tags']
            if 'common_monitors' in CONFIG:
                if 'monitors' in instance:
                    instance['monitors'] += CONFIG['common_monitors']
                else:
                    instance['monitors'] = CONFIG['common_monitors']

            if not 'monitors' in instance:
                utils.err("error: no monitors configured")
                sys.exit(13)
            if not 'tags' in instance:
                instance['tags'] = []

            if not 'auth' in instance:
                instance['auth'] = {'username': '', 'password': ''}

            jc = JolokiaCollector(instance['url'], instance['auth'],
                                  instance['tags'], instance['monitors'])
            instances.append(jc)

        # LOOP!!
        while True:
            for i in instances:
                i.process_data()

            try:
                time.sleep(CONFIG['interval'])
            except KeyboardInterrupt:
                break
Example #28
0
def main():
    with utils.lower_privileges(self._logger):
        if BinLogStreamReader is None:
            utils.err("error: Python module `pymysqlreplication' is missing")
            return 1
        if pymysql is None:
            utils.err("error: Python module `pymysql' is missing")
            return 1
        settings = zabbix_bridge_conf.get_settings()

        # Set blocking to True if you want to block and wait for the next event at
        # the end of the stream
        stream = BinLogStreamReader(connection_settings=settings['mysql'],
                                    server_id=settings['slaveid'],
                                    only_events=[WriteRowsEvent],
                                    resume_stream=True,
                                    blocking=True)

        hostmap = gethostmap(settings)  # Prime initial hostmap
        for binlogevent in stream:
            if binlogevent.schema == settings['mysql']['db']:
                table = binlogevent.table
                log_pos = binlogevent.packet.log_pos
                if table == 'history' or table == 'history_uint':
                    for row in binlogevent.rows:
                        r = row['values']
                        itemid = r['itemid']
                        try:
                            hm = hostmap[itemid]
                            print "zbx.%s %d %s host=%s proxy=%s" % (
                                hm['key'], r['clock'], r['value'], hm['host'],
                                hm['proxy'])
                        except KeyError:
                            # TODO: Consider https://wiki.python.org/moin/PythonDecoratorLibrary#Retry
                            hostmap = gethostmap(settings)
                            utils.err("error: Key lookup miss for %s" %
                                      (itemid))
                    sys.stdout.flush()
                    # if n seconds old, reload
                    # settings['gethostmap_interval']

        stream.close()
Example #29
0
def main():
    if not (jolokia_conf and jolokia_conf.enabled()):
        utils.err("Jolokia collector disable by config")
        sys.exit(13)
    with utils.lower_privileges(self._logger):

        CONFIG = jolokia_conf.get_config()
        instances = []

        for instance in CONFIG['instances']:
            if 'common_tags' in CONFIG:
                if 'tags' in instance:
                    instance['tags'].update(CONFIG['common_tags'])
                else:
                    instance['tags'] = CONFIG['common_tags']
            if 'common_monitors' in CONFIG:
                if 'monitors' in instance:
                    instance['monitors'] += CONFIG['common_monitors']
                else:
                    instance['monitors'] = CONFIG['common_monitors']

            if not 'monitors' in instance:
                utils.err("error: no monitors configured")
                sys.exit(13)
            if not 'tags' in instance:
                instance['tags'] = []

            if not 'auth' in instance:
                instance['auth'] = {'username': '', 'password': ''}

            jc = JolokiaCollector(instance['url'], instance['auth'], instance['tags'], instance['monitors'])
            instances.append(jc)

        # LOOP!!
        while True:
            for i in instances:
                i.process_data()

            try:
                time.sleep(CONFIG['interval'])
            except KeyboardInterrupt:
                break
Example #30
0
def main():
 with utils.lower_privileges(self._logger):
     bad_regex = re.compile("[,()]+")  # avoid forbidden by TSD symbols

     while True:
        try:
          if vstats == "all":
            stats = subprocess.Popen(
              ["varnishstat", "-1", "-j"],
              stdout=subprocess.PIPE,
            )
          else:
            fields = ",".join(vstats)
            stats = subprocess.Popen(
              ["varnishstat", "-1", "-f" + fields, "-j"],
              stdout=subprocess.PIPE,
            )
        except OSError, e:
          # Die and signal to tcollector not to run this script.
          sys.stderr.write("Error: %s\n" % e)
          sys.exit(13)

        metrics = ""
        for line in stats.stdout.readlines():
          metrics += line
        metrics = json.loads(metrics)

        timestamp = ""
        if use_varnishstat_timestamp:
          pattern = "%Y-%m-%dT%H:%M:%S"
          timestamp = int(time.mktime(time.strptime(metrics['timestamp'], pattern)))
        else:
          timestamp = time.time()

        for k, v in metrics.iteritems():
          if k != "timestamp" and None == bad_regex.search(k):
            metric_name = metric_prefix + "." + k
            print "%s %d %s %s" % \
              (metric_name, timestamp, v['value'], ",".join(tags))

        sys.stdout.flush()
        time.sleep(interval)
Example #31
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            cpu_time = 0

            try:
                s = self.process.stat()
            except ProcessTerminatedError:
                self.log_warn("process terminated, abort")
                return

            cpu_time += int(s["utime"])
            cpu_time += int(s["cutime"])
            cpu_time += int(s["stime"])
            cpu_time += int(s["cstime"])
            cpu_time += int(s["guest_time"])
            cpu_time += int(s["cguest_time"])

            ts = int(time.time())
            self._readq.nput("cloudwiz-agent.cputime %s %s" % (ts, cpu_time))
            self._readq.nput("cloudwiz-agent.mem_bytes %s %s type=vsize" % (ts, s["vsize"]))
            self._readq.nput("cloudwiz-agent.mem_bytes %s %s type=rss" % (ts, int(s["rss"]) * resource.getpagesize()))
Example #32
0
def main():
    with utils.lower_privileges(self._logger):
        if BinLogStreamReader is None:
            utils.err("error: Python module `pymysqlreplication' is missing")
            return 1
        if pymysql is None:
            utils.err("error: Python module `pymysql' is missing")
            return 1
        settings = zabbix_bridge_conf.get_settings()

        # Set blocking to True if you want to block and wait for the next event at
        # the end of the stream
        stream = BinLogStreamReader(connection_settings=settings['mysql'],
                                    server_id=settings['slaveid'],
                                    only_events=[WriteRowsEvent],
                                    resume_stream=True,
                                    blocking=True)

        hostmap = gethostmap(settings) # Prime initial hostmap
        for binlogevent in stream:
            if binlogevent.schema == settings['mysql']['db']:
                table = binlogevent.table
                log_pos = binlogevent.packet.log_pos
                if table == 'history' or table == 'history_uint':
                    for row in binlogevent.rows:
                        r = row['values']
                        itemid = r['itemid']
                        try:
                            hm = hostmap[itemid]
                            print "zbx.%s %d %s host=%s proxy=%s" % (hm['key'], r['clock'], r['value'], hm['host'], hm['proxy'])
                        except KeyError:
                            # TODO: Consider https://wiki.python.org/moin/PythonDecoratorLibrary#Retry
                            hostmap = gethostmap(settings)
                            utils.err("error: Key lookup miss for %s" % (itemid))
                    sys.stdout.flush()
                    # if n seconds old, reload
                    # settings['gethostmap_interval']

        stream.close()
    def __call__(self):
        with utils.lower_privileges(self._logger):
            try:
                self.validate_config()
                regions = ec2_list_regions()
                for reg in regions:
                    for statistic in STATISTICS:
                        t = threading.Thread(target=self.handle_region,
                                             kwargs={
                                                 "region": reg,
                                                 "statistic": statistic
                                             })
                        t.start()
                while threading.activeCount() > 1:
                    time.sleep(1)
            except exceptions.KeyboardInterrupt:
                return 0
            except:
                raise

            if not sendQueue.empty():
                self.send_metrics()
Example #34
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            # Connect to Docker socket to get informations about containers every 4 times
            if self.cache == 0:
                self.containernames = {}
                self.containerimages = {}
            self.cache += 1
            if self.cache == 4:
                self.cache = 0

            if os.path.isdir(self.cgroup_path):
                for level1 in os.listdir(self.cgroup_path):
                    if (os.path.isdir(self.cgroup_path + "/" + level1 + "/docker") and
                            # /cgroup/cpu and /cgroup/cpuacct are often links to /cgroup/cpu,cpuacct
                            not (((level1 == "cpu,cpuacct") or (level1 == "cpuacct")) and (
                                    os.path.isdir(self.cgroup_path + "/cpu/docker")))):
                        for level2 in os.listdir(self.cgroup_path + "/" + level1 + "/docker"):
                            if os.path.isdir(self.cgroup_path + "/" + level1 + "/docker/" + level2):
                                self.readdockerstats(self.cgroup_path + "/" + level1 + "/docker/" + level2, level2)
                    else:
                        # If Docker cgroup is handled by slice
                        # http://www.freedesktop.org/software/systemd/man/systemd.slice.html
                        for slicename in ("system.slice", "machine.slice", "user.slice"):
                            if (os.path.isdir(self.cgroup_path + "/" + level1 + "/" + slicename) and
                                    # /cgroup/cpu and /cgroup/cpuacct are often links to /cgroup/cpu,cpuacct
                                    not (((level1 == "cpu,cpuacct") or (level1 == "cpuacct")) and (
                                            os.path.isdir(self.cgroup_path + "/cpu/" + slicename)))):
                                for level2 in os.listdir(self.cgroup_path + "/" + level1 + "/" + slicename):
                                    if os.path.isdir(self.cgroup_path + "/" + level1 + "/" + slicename + "/" + level2):
                                        m = re.search("^docker-(\w+)\.scope$", level2)
                                        if m:
                                            self.readdockerstats(
                                                    self.cgroup_path + "/" + level1 + "/" + slicename + "/" + level2,
                                                    m.group(1))
                                            break
            if os.path.isdir(self.cgroup_path + "/lxc"):
                for level1 in os.listdir(self.cgroup_path + "/lxc"):
                    if os.path.isdir(self.cgroup_path + "/lxc/" + level1):
                        self.readdockerstats(self.cgroup_path + "/lxc/" + level1, level1)
Example #35
0
def main():
    loadEnv()

    with utils.lower_privileges(self._logger):
        if pymongo is None:
            print >>sys.stderr, "error: Python module `pymongo' is missing"
            return 13

        for index, item in enumerate(CONFIG_CONN, start=0):
            conn = pymongo.MongoClient(host=item['host'], port=item['port'])
            if USER:
                conn.admin.authenticate(USER, PASS, mechanism='DEFAULT')
            CONFIG_CONN[index]['link'] = conn

        for index, item in enumerate(MONGOS_CONN, start=0):
            conn = pymongo.MongoClient(host=item['host'], port=item['port'])
            if USER:
                conn.admin.authenticate(USER, PASS, mechanism='DEFAULT')
            MONGOS_CONN[index]['link'] = conn

        for index, item in enumerate(REPLICA_CONN, start=0):
            conn = pymongo.MongoClient(host=item['host'], port=item['port'])
            if USER:
                conn.admin.authenticate(USER, PASS, mechanism='DEFAULT')
            REPLICA_CONN[index]['link'] = conn

        while True:
            for conn in CONFIG_CONN:
                runServerStatus(conn['link'])

            for conn in MONGOS_CONN:
                runDbStats(conn['link'])

            for conn in REPLICA_CONN:
                runReplSetGetStatus(conn['link'])

            sys.stdout.flush()
            time.sleep(INTERVAL)
Example #36
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            cpu_time = 0

            try:
                s = self.process.stat()
            except ProcessTerminatedError:
                self.log_warn("process terminated, abort")
                return

            cpu_time += int(s["utime"])
            cpu_time += int(s["cutime"])
            cpu_time += int(s["stime"])
            cpu_time += int(s["cstime"])
            cpu_time += int(s["guest_time"])
            cpu_time += int(s["cguest_time"])

            ts = int(time.time())
            self._readq.nput("cloudwiz-agent.cputime %s %s" % (ts, cpu_time))
            self._readq.nput("cloudwiz-agent.mem_bytes %s %s type=vsize" %
                             (ts, s["vsize"]))
            self._readq.nput("cloudwiz-agent.mem_bytes %s %s type=rss" %
                             (ts, int(s["rss"]) * resource.getpagesize()))
Example #37
0
def main():
    """Main loop"""

    # don't run if we're not a riak node
    if not os.path.exists("/usr/lib/riak"):
        sys.exit(13)

    with utils.lower_privileges(self._logger):
        sys.stdin.close()

        interval = 15

        def print_stat(metric, value, tags=""):
            if value is not None:
                print "riak.%s %d %s %s" % (metric, ts, value, tags)

        while True:
            ts = int(time.time())

            req = urllib2.urlopen("http://localhost:8098/stats")
            if req is not None:
                obj = json.loads(req.read())
                for key in obj:
                    if key not in MAP:
                        continue
                    # this is a hack, but Riak reports latencies in microseconds.  they're fairly useless
                    # to our human operators, so we're going to convert them to seconds.
                    if 'latency' in MAP[key][0]:
                        obj[key] = obj[key] / 1000000.0
                    print_stat(MAP[key][0], obj[key], MAP[key][1])
                if 'connected_nodes' in obj:
                    print_stat('connected_nodes', len(obj['connected_nodes']),
                               '')
            req.close()

            sys.stdout.flush()
            time.sleep(interval)
Example #38
0
    def __call__(self):
        with utils.lower_privileges(self._logger):

            # We just care about ethN and emN interfaces.  We specifically
            # want to avoid bond interfaces, because interface
            # stats are still kept on the child interfaces when
            # you bond.  By skipping bond we avoid double counting.

            self.f_netdev.seek(0)
            ts = int(time.time())
            for line in self.f_netdev:
                m = re.match("\s+(eth?\d+|em\d+_\d+/\d+|em\d+_\d+|em\d+|"
                             "p\d+p\d+_\d+/\d+|p\d+p\d+_\d+|p\d+p\d+):(.*)", line)
                if not m:
                    continue
                intf = m.group(1)
                stats = m.group(2).split(None)

                def direction(idx):
                    if idx >= 8:
                        return "out"
                    return "in"
                for i in xrange(16):
                    self._readq.nput("proc.net.%s %d %s iface=%s direction=%s" % (FIELDS[i], ts, stats[i], intf, direction(i)))
Example #39
0
def main():
    """Main loop"""

    # don't run if we're not a riak node
    if not os.path.exists("/usr/lib/riak"):
        sys.exit(13)

    with utils.lower_privileges(self._logger):
        sys.stdin.close()

        interval = 15

        def print_stat(metric, value, tags=""):
            if value is not None:
                print "riak.%s %d %s %s" % (metric, ts, value, tags)

        while True:
            ts = int(time.time())

            req = urllib2.urlopen("http://localhost:8098/stats")
            if req is not None:
                obj = json.loads(req.read())
                for key in obj:
                    if key not in MAP:
                        continue
                    # this is a hack, but Riak reports latencies in microseconds.  they're fairly useless
                    # to our human operators, so we're going to convert them to seconds.
                    if 'latency' in MAP[key][0]:
                        obj[key] = obj[key] / 1000000.0
                    print_stat(MAP[key][0], obj[key], MAP[key][1])
                if 'connected_nodes' in obj:
                    print_stat('connected_nodes', len(obj['connected_nodes']), '')
            req.close()

            sys.stdout.flush()
            time.sleep(interval)
Example #40
0
def main():
    """ntpstats main loop"""

    collection_interval=DEFAULT_COLLECTION_INTERVAL
    if(ntpstat_conf):
        config = ntpstat_conf.get_config()
        collection_interval=config['collection_interval']

    with utils.lower_privileges(self._logger):
        while True:
            ts = int(time.time())
            try:
                ntp_proc = subprocess.Popen(["ntpq", "-p"], stdout=subprocess.PIPE)
            except OSError, e:
                if e.errno == errno.ENOENT:
                    # looks like ntpdc is not available, stop using this collector
                    sys.exit(13) # we signal tcollector to stop using this
                raise

            stdout, _ = ntp_proc.communicate()
            if ntp_proc.returncode == 0:
                for line in stdout.split("\n"):
                    if not line:
                        continue
                    fields = line.split()
                    if len(fields) <= 0:
                        continue
                    if fields[0].startswith("*"):
                        offset=fields[8]
                        continue
                print ("ntp.offset %d %s" % (ts, offset))
            else:
                print >> sys.stderr, "ntpq -p, returned %r" % (ntp_proc.returncode)

            sys.stdout.flush()
            time.sleep(collection_interval)
Example #41
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         if json:
             HadoopNode(self.service, self.daemon, self.host, self.port, REPLACEMENTS, self.readq, self._logger).emit()
         else:
             self.logger.error("This collector requires the `json' Python module.")
Example #42
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            # proc.uptime
            self.f_uptime.seek(0)
            ts = int(time.time())
            for line in self.f_uptime:
                m = re.match("(\S+)\s+(\S+)", line)
                if m:
                    self._readq.nput("proc.uptime.total %d %s" % (ts, m.group(1)))
                    self._readq.nput("proc.uptime.now %d %s" % (ts, m.group(2)))

            # proc.meminfo
            self.f_meminfo.seek(0)
            ts = int(time.time())
            for line in self.f_meminfo:
                m = re.match("(\w+):\s+(\d+)\s+(\w+)", line)
                if m:
                    if m.group(3).lower() == 'kb':
                        # convert from kB to B for easier graphing
                        value = str(int(m.group(2)) * 1024)
                    else:
                        value = m.group(2)
                    self._readq.nput("proc.meminfo.%s %d %s" % (m.group(1).lower(), ts, value))

            # proc.vmstat
            self.f_vmstat.seek(0)
            ts = int(time.time())
            for line in self.f_vmstat:
                m = re.match("(\w+)\s+(\d+)", line)
                if not m:
                    continue
                if m.group(1) in ("pgpgin", "pgpgout", "pswpin",
                                  "pswpout", "pgfault", "pgmajfault"):
                    self._readq.nput("proc.vmstat.%s %d %s" % (m.group(1), ts, m.group(2)))

            # proc.stat
            self.f_stat.seek(0)
            ts = int(time.time())
            for line in self.f_stat:
                m = re.match("(\w+)\s+(.*)", line)
                if not m:
                    continue
                if m.group(1).startswith("cpu"):
                    cpu_m = re.match("cpu(\d+)", m.group(1))
                    if cpu_m:
                        metric_percpu = '.percpu'
                        tags = ' cpu=%s' % cpu_m.group(1)
                    else:
                        metric_percpu = ''
                        tags = ''
                    fields = m.group(2).split()
                    cpu_types = ['user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq', 'guest', 'guest_nice']

                    # We use zip to ignore fields that don't exist.
                    for value, field_name in zip(fields, cpu_types):
                        self._readq.nput("proc.stat.cpu%s %d %s type=%s%s" % (metric_percpu, ts, value, field_name, tags))
                elif m.group(1) == "intr":
                    self._readq.nput("proc.stat.intr %d %s" % (ts, m.group(2).split()[0]))
                elif m.group(1) == "ctxt":
                    self._readq.nput("proc.stat.ctxt %d %s" % (ts, m.group(2)))
                elif m.group(1) == "processes":
                    self._readq.nput("proc.stat.processes %d %s" % (ts, m.group(2)))
                elif m.group(1) == "procs_blocked":
                    self._readq.nput("proc.stat.procs_blocked %d %s" % (ts, m.group(2)))

            self.f_loadavg.seek(0)
            ts = int(time.time())
            for line in self.f_loadavg:
                m = re.match("(\S+)\s+(\S+)\s+(\S+)\s+(\d+)/(\d+)\s+", line)
                if not m:
                    continue
                self._readq.nput("proc.loadavg.1min %d %s" % (ts, m.group(1)))
                self._readq.nput("proc.loadavg.5min %d %s" % (ts, m.group(2)))
                self._readq.nput("proc.loadavg.15min %d %s" % (ts, m.group(3)))
                self._readq.nput("proc.loadavg.runnable %d %s" % (ts, m.group(4)))
                self._readq.nput("proc.loadavg.total_threads %d %s" % (ts, m.group(5)))

            self.f_entropy_avail.seek(0)
            ts = int(time.time())
            for line in self.f_entropy_avail:
                self._readq.nput("proc.kernel.entropy_avail %d %s" % (ts, line.strip()))

            self.f_interrupts.seek(0)
            ts = int(time.time())
            # Get number of CPUs from description line.
            num_cpus = len(self.f_interrupts.readline().split())
            for line in self.f_interrupts:
                cols = line.split()

                irq_type = cols[0].rstrip(":")
                if irq_type.isalnum():
                    if irq_type.isdigit():
                        if cols[-2] == "PCI-MSI-edge" and "eth" in cols[-1]:
                            irq_type = cols[-1]
                        else:
                            continue  # Interrupt type is just a number, ignore.
                    for i, val in enumerate(cols[1:]):
                        if i >= num_cpus:
                            # All values read, remaining cols contain textual
                            # description
                            break
                        if not val.isdigit():
                            # something is weird, there should only be digit values
                            self.log_error("Unexpected interrupts value %r in %r: ", val, cols)
                            break
                        self._readq.nput("proc.interrupts %s %s type=%s cpu=%s" % (ts, val, irq_type, i))

            self.f_softirqs.seek(0)
            ts = int(time.time())
            # Get number of CPUs from description line.
            num_cpus = len(self.f_softirqs.readline().split())
            for line in self.f_softirqs:
                cols = line.split()

                irq_type = cols[0].rstrip(":")
                for i, val in enumerate(cols[1:]):
                    if i >= num_cpus:
                        # All values read, remaining cols contain textual
                        # description
                        break
                    if not val.isdigit():
                        # something is weird, there should only be digit values
                        self.log_error("Unexpected softirq value %r in %r: ", val, cols)
                        break
                    self._readq.nput("proc.softirqs %s %s type=%s cpu=%s" % (ts, val, irq_type, i))

            self._print_numa_stats(self.numastats)

            # Print scaling stats
            ts = int(time.time())
            for cpu_no in self.f_scaling_min.keys():
                f = self.f_scaling_min[cpu_no]
                f.seek(0)
                for line in f:
                    self._readq.nput("proc.scaling.min %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no))
            ts = int(time.time())
            for cpu_no in self.f_scaling_max.keys():
                f = self.f_scaling_max[cpu_no]
                f.seek(0)
                for line in f:
                    self._readq.nput("proc.scaling.max %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no))
            ts = int(time.time())
            for cpu_no in self.f_scaling_cur.keys():
                f = self.f_scaling_cur[cpu_no]
                f.seek(0)
                for line in f:
                    self._readq.nput("proc.scaling.cur %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no))
Example #43
0
        "remove", "rmdir", "rename", "link", "readdir", "readdirplus",
        "fsstat", "fsinfo", "pathconf", "commit",
    ),
}


def main():
    """nfsstat main loop"""

    try:
        f_nfs = open("/proc/net/rpc/nfs")
    except IOError, e:
        print >>sys.stderr, "Failed to open input file: %s" % (e,)
        return 13  # Ask tcollector to not re-start us immediately.

    with utils.lower_privileges(self._logger):
        while True:
            f_nfs.seek(0)
            ts = int(time.time())
            for line in f_nfs:
                fields = line.split()
                if fields[0] in nfs_client_proc_names.keys():
                    # NFSv4
                    # first entry should equal total count of subsequent entries
                    assert int(fields[1]) == len(fields[2:]), (
                        "reported count (%d) does not equal list length (%d)"
                        % (int(fields[1]), len(fields[2:])))
                    for idx, val in enumerate(fields[2:]):
                        try:
                            print ("nfs.client.rpc %d %s op=%s version=%s"
                                   % (ts, int(val), nfs_client_proc_names[fields[0]][idx], fields[0][4:]))
Example #44
0
        "pathconf",
        "commit",
    ),
}


def main():
    """nfsstat main loop"""

    try:
        f_nfs = open("/proc/net/rpc/nfs")
    except IOError, e:
        print >> sys.stderr, "Failed to open input file: %s" % (e, )
        return 13  # Ask tcollector to not re-start us immediately.

    with utils.lower_privileges(self._logger):
        while True:
            f_nfs.seek(0)
            ts = int(time.time())
            for line in f_nfs:
                fields = line.split()
                if fields[0] in nfs_client_proc_names.keys():
                    # NFSv4
                    # first entry should equal total count of subsequent entries
                    assert int(fields[1]) == len(fields[2:]), (
                        "reported count (%d) does not equal list length (%d)" %
                        (int(fields[1]), len(fields[2:])))
                    for idx, val in enumerate(fields[2:]):
                        try:
                            print("nfs.client.rpc %d %s op=%s version=%s" %
                                  (ts, int(val),
Example #45
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            ret_metrics = []
            devices = []
            self.f_mounts.seek(0)
            ts = int(time.time())

            for line in self.f_mounts:
                # Docs come from the fstab(5)
                # fs_spec     # Mounted block special device or remote filesystem
                # fs_file     # Mount point
                # fs_vfstype  # File system type
                # fs_mntops   # Mount options
                # fs_freq     # Dump(8) utility flags
                # fs_passno   # Order in which filesystem checks are done at reboot time
                try:
                    fs_spec, fs_file, fs_vfstype, fs_mntops, fs_freq, fs_passno = line.split(
                        None)
                except ValueError, e:
                    self._readq.nput("df.state %d %s" % (ts, "1"))
                    self.log_exception("can't parse line at /proc/mounts.")
                    continue

                if fs_spec == "none":
                    continue
                elif fs_vfstype in FSTYPE_IGNORE or fs_vfstype.startswith(
                        "fuse."):
                    continue
                # startswith(tuple) avoided to preserve support of Python 2.4
                elif fs_file.startswith("/dev") or fs_file.startswith("/sys") or \
                      fs_file.startswith("/proc") or fs_file.startswith("/lib") or \
                      fs_file.startswith("net:"):
                    continue

                # keep /dev/xxx device with shorter fs_file (remove mount binds)
                device_found = False
                if fs_spec.startswith("/dev"):
                    for device in devices:
                        if fs_spec == device[0]:
                            device_found = True
                            if len(fs_file) < len(device[1]):
                                device[1] = fs_file
                            break
                    if not device_found:
                        devices.append([fs_spec, fs_file, fs_vfstype])
                else:
                    devices.append([fs_spec, fs_file, fs_vfstype])

            for device in devices:
                fs_spec, fs_file, fs_vfstype = device
                try:
                    r = os.statvfs(fs_file)
                except OSError, e:
                    self._readq.nput("df.state %d %s" % (ts, "1"))
                    self.log_exception(
                        "can't get info for mount point: %s: %s" %
                        (fs_file, e))
                    continue

                used = r.f_blocks - r.f_bfree

                # conditional expression avoided to preserve support of Python 2.4
                # percent_used = 100 if r.f_blocks == 0 else used * 100.0 / r.f_blocks
                if r.f_blocks == 0:
                    percent_used = 100
                else:
                    percent_used = used * 100.0 / r.f_blocks

                self._readq.nput(
                    "df.bytes.total %d %s mount=%s fstype=%s" %
                    (ts, r.f_frsize * r.f_blocks, fs_file, fs_vfstype))
                self._readq.nput("df.bytes.used %d %s mount=%s fstype=%s" %
                                 (ts, r.f_frsize * used, fs_file, fs_vfstype))
                self._readq.nput(
                    "df.bytes.percentused %d %s mount=%s fstype=%s" %
                    (ts, percent_used, fs_file, fs_vfstype))
                self._readq.nput(
                    "df.bytes.free %d %s mount=%s fstype=%s" %
                    (ts, r.f_frsize * r.f_bfree, fs_file, fs_vfstype))

                used = r.f_files - r.f_ffree

                # percent_used = 100 if r.f_files == 0 else used * 100.0 / r.f_files
                if r.f_files == 0:
                    percent_used = 100
                else:
                    percent_used = used * 100.0 / r.f_files

                self._readq.nput("df.inodes.total %d %s mount=%s fstype=%s" %
                                 (ts, r.f_files, fs_file, fs_vfstype))
                self._readq.nput("df.inodes.used %d %s mount=%s fstype=%s" %
                                 (ts, used, fs_file, fs_vfstype))
                self._readq.nput(
                    "df.inodes.percentused %d %s mount=%s fstype=%s" %
                    (ts, percent_used, fs_file, fs_vfstype))
                self._readq.nput("df.inodes.free %d %s mount=%s fstype=%s" %
                                 (ts, r.f_ffree, fs_file, fs_vfstype))
                self._readq.nput("df.state %d %s" % (ts, "0"))
Example #46
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         containers = self.get_container_list()
         for containername in containers:
             self.get_container_stats(containername)
Example #47
0
    def __call__(self):
        init_stats = {
            "read_requests": 0,
            "read_merged": 0,
            "read_sectors": 0,
            "msec_read": 0,
            "write_requests": 0,
            "write_merged": 0,
            "write_sectors": 0,
            "msec_write": 0,
            "ios_in_progress": 0,
            "msec_total": 0,
            "msec_weighted_total": 0,
        }
        prev_stats = dict()
        with utils.lower_privileges(self._logger):
            self.f_diskstats.seek(0)
            ts = int(time.time())
            itv = read_uptime()[0]
            for line in self.f_diskstats:
                # maj, min, devicename, [list of stats, see above]
                values = line.split(None)
                # shortcut the deduper and just skip disks that
                # haven't done a single read.  This eliminates a bunch
                # of loopback, ramdisk, and cdrom devices but still
                # lets us report on the rare case that we actually use
                # a ramdisk.
                if values[3] == "0":
                    continue

                if int(values[1]) % 16 == 0 and int(values[0]) > 1:
                    metric = "iostat.disk."
                else:
                    metric = "iostat.part."

                device = values[2]
                if len(values) == 14:
                    # full stats line
                    for i in range(11):
                        self._readq.nput("%s%s %d %s dev=%s" % (metric, FIELDS_DISK[i], ts, values[i + 3], device))

                    ret = is_device(device, 0)
                    # if a device or a partition, calculate the svctm/await/util
                    if ret:
                        stats = dict(zip(FIELDS_DISK, values[3:]))
                        if device not in prev_stats:
                            prev_stats[device] = init_stats
                        rd_ios = float(stats.get("read_requests"))
                        wr_ios = float(stats.get("write_requests"))
                        nr_ios = rd_ios + wr_ios
                        prev_rd_ios = float(prev_stats[device].get("read_requests"))
                        prev_wr_ios = float(prev_stats[device].get("write_requests"))
                        prev_nr_ios = prev_rd_ios + prev_wr_ios
                        tput = ((nr_ios - prev_nr_ios) * float(self.hz) / float(itv))
                        util = ((float(stats.get("msec_total")) - float(prev_stats[device].get("msec_total"))) * float(self.hz) / float(itv))
                        svctm = 0.0
                        await = 0.0
                        r_await = 0.0
                        w_await = 0.0

                        if tput:
                            svctm = util / tput

                        rd_ticks = stats.get("msec_read")
                        wr_ticks = stats.get("msec_write")
                        prev_rd_ticks = prev_stats[device].get("msec_read")
                        prev_wr_ticks = prev_stats[device].get("msec_write")
                        if rd_ios != prev_rd_ios:
                            r_await = (float(rd_ticks) - float(prev_rd_ticks)) / float(rd_ios - prev_rd_ios)
                        if wr_ios != prev_wr_ios:
                            w_await = (float(wr_ticks) - float(prev_wr_ticks)) / float(wr_ios - prev_wr_ios)
                        if nr_ios != prev_nr_ios:
                            await = (float(rd_ticks) + float(wr_ticks) - float(prev_rd_ticks) - float(prev_wr_ticks)) / float(nr_ios - prev_nr_ios)
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "svctm", ts, svctm, device))
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "r_await", ts, r_await, device))
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "w_await", ts, w_await, device))
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "await", ts, await, device))
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "util", ts, float(util / 1000.0), device))

                        prev_stats[device] = copy.deepcopy(stats)

                elif len(values) == 7:
                    # partial stats line
                    for i in range(4):
                        self._readq.nput("%s%s %d %s dev=%s" % (metric, FIELDS_PART[i], ts, values[i + 3], device))
                else:
                    self.log_error("Cannot parse /proc/diskstats line: %s", line)
                    continue
Example #48
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            # proc.uptime
            self.f_uptime.seek(0)
            ts = int(time.time())
            for line in self.f_uptime:
                m = re.match("(\S+)\s+(\S+)", line)
                if m:
                    self._readq.nput("proc.uptime.total %d %s" %
                                     (ts, m.group(1)))
                    self._readq.nput("proc.uptime.now %d %s" %
                                     (ts, m.group(2)))

            # proc.meminfo
            self.f_meminfo.seek(0)
            ts = int(time.time())
            for line in self.f_meminfo:
                m = re.match("(\w+):\s+(\d+)\s+(\w+)", line)
                if m:
                    if m.group(3).lower() == 'kb':
                        # convert from kB to B for easier graphing
                        value = str(int(m.group(2)) * 1024)
                    else:
                        value = m.group(2)
                    self._readq.nput("proc.meminfo.%s %d %s" %
                                     (m.group(1).lower(), ts, value))

            # proc.vmstat
            self.f_vmstat.seek(0)
            ts = int(time.time())
            for line in self.f_vmstat:
                m = re.match("(\w+)\s+(\d+)", line)
                if not m:
                    continue
                if m.group(1) in ("pgpgin", "pgpgout", "pswpin", "pswpout",
                                  "pgfault", "pgmajfault"):
                    self._readq.nput("proc.vmstat.%s %d %s" %
                                     (m.group(1), ts, m.group(2)))

            # proc.stat
            self.f_stat.seek(0)
            ts = int(time.time())
            for line in self.f_stat:
                m = re.match("(\w+)\s+(.*)", line)
                if not m:
                    continue
                if m.group(1).startswith("cpu"):
                    cpu_m = re.match("cpu(\d+)", m.group(1))
                    if cpu_m:
                        metric_percpu = '.percpu'
                        tags = ' cpu=%s' % cpu_m.group(1)
                    else:
                        metric_percpu = ''
                        tags = ''
                    fields = m.group(2).split()
                    cpu_types = [
                        'user', 'nice', 'system', 'idle', 'iowait', 'irq',
                        'softirq', 'guest', 'guest_nice'
                    ]

                    # We use zip to ignore fields that don't exist.
                    for value, field_name in zip(fields, cpu_types):
                        self._readq.nput(
                            "proc.stat.cpu%s %d %s type=%s%s" %
                            (metric_percpu, ts, value, field_name, tags))
                elif m.group(1) == "intr":
                    self._readq.nput("proc.stat.intr %d %s" %
                                     (ts, m.group(2).split()[0]))
                elif m.group(1) == "ctxt":
                    self._readq.nput("proc.stat.ctxt %d %s" % (ts, m.group(2)))
                elif m.group(1) == "processes":
                    self._readq.nput("proc.stat.processes %d %s" %
                                     (ts, m.group(2)))
                elif m.group(1) == "procs_blocked":
                    self._readq.nput("proc.stat.procs_blocked %d %s" %
                                     (ts, m.group(2)))

            self.f_loadavg.seek(0)
            ts = int(time.time())
            for line in self.f_loadavg:
                m = re.match("(\S+)\s+(\S+)\s+(\S+)\s+(\d+)/(\d+)\s+", line)
                if not m:
                    continue
                self._readq.nput("proc.loadavg.1min %d %s" % (ts, m.group(1)))
                self._readq.nput("proc.loadavg.5min %d %s" % (ts, m.group(2)))
                self._readq.nput("proc.loadavg.15min %d %s" % (ts, m.group(3)))
                self._readq.nput("proc.loadavg.runnable %d %s" %
                                 (ts, m.group(4)))
                self._readq.nput("proc.loadavg.total_threads %d %s" %
                                 (ts, m.group(5)))

            self.f_entropy_avail.seek(0)
            ts = int(time.time())
            for line in self.f_entropy_avail:
                self._readq.nput("proc.kernel.entropy_avail %d %s" %
                                 (ts, line.strip()))

            self.f_interrupts.seek(0)
            ts = int(time.time())
            # Get number of CPUs from description line.
            num_cpus = len(self.f_interrupts.readline().split())
            for line in self.f_interrupts:
                cols = line.split()

                irq_type = cols[0].rstrip(":")
                if irq_type.isalnum():
                    if irq_type.isdigit():
                        if cols[-2] == "PCI-MSI-edge" and "eth" in cols[-1]:
                            irq_type = cols[-1]
                        else:
                            continue  # Interrupt type is just a number, ignore.
                    for i, val in enumerate(cols[1:]):
                        if i >= num_cpus:
                            # All values read, remaining cols contain textual
                            # description
                            break
                        if not val.isdigit():
                            # something is weird, there should only be digit values
                            self.log_error(
                                "Unexpected interrupts value %r in %r: ", val,
                                cols)
                            break
                        self._readq.nput(
                            "proc.interrupts %s %s type=%s cpu=%s" %
                            (ts, val, irq_type, i))

            self.f_softirqs.seek(0)
            ts = int(time.time())
            # Get number of CPUs from description line.
            num_cpus = len(self.f_softirqs.readline().split())
            for line in self.f_softirqs:
                cols = line.split()

                irq_type = cols[0].rstrip(":")
                for i, val in enumerate(cols[1:]):
                    if i >= num_cpus:
                        # All values read, remaining cols contain textual
                        # description
                        break
                    if not val.isdigit():
                        # something is weird, there should only be digit values
                        self.log_error("Unexpected softirq value %r in %r: ",
                                       val, cols)
                        break
                    self._readq.nput("proc.softirqs %s %s type=%s cpu=%s" %
                                     (ts, val, irq_type, i))

            self._print_numa_stats(self.numastats)

            # Print scaling stats
            ts = int(time.time())
            for cpu_no in self.f_scaling_min.keys():
                f = self.f_scaling_min[cpu_no]
                f.seek(0)
                for line in f:
                    self._readq.nput("proc.scaling.min %d %s cpu=%s" %
                                     (ts, line.rstrip('\n'), cpu_no))
            ts = int(time.time())
            for cpu_no in self.f_scaling_max.keys():
                f = self.f_scaling_max[cpu_no]
                f.seek(0)
                for line in f:
                    self._readq.nput("proc.scaling.max %d %s cpu=%s" %
                                     (ts, line.rstrip('\n'), cpu_no))
            ts = int(time.time())
            for cpu_no in self.f_scaling_cur.keys():
                f = self.f_scaling_cur[cpu_no]
                f.seek(0)
                for line in f:
                    self._readq.nput("proc.scaling.cur %d %s cpu=%s" %
                                     (ts, line.rstrip('\n'), cpu_no))

            self._readq.nput("procstats.state %s %s" % (int(time.time()), '0'))
Example #49
0
 def __init__(self, config, logger, readq):
     super(DockerAlauda, self).__init__(config, logger, readq)
     with utils.lower_privileges(self._logger):
         self._init_alauda_session()
Example #50
0
 def __call__(self):
     with utils.lower_privileges(self._logger):
         containers = self.get_container_list()
         for containername in containers:
             self.get_container_stats(containername)
Example #51
0
def main():
    if not (tcp_bridge_conf and tcp_bridge_conf.enabled()):
        print >> sys.stderr, 'not enabled, or tcp_bridge_conf unavilable'
        sys.exit(13)
    with utils.lower_privileges(self._logger):

        def printm(string, time, value):
            out.write(m_namespace+string+' '+str(time)+' '+str(value)+'\n')

        def printmetrics():
            global m_delay
            global m_last

            ts = int(time.time())
            if ts > m_last+m_delay:
                printm('lines_read', ts, m_lines)
                printm('connections_processed', ts, m_connections)
                printm('processing_time', ts, m_ptime)
                printm('active', ts, 1)
                m_last = ts

        def clientthread(connection):
            global m_lines
            global m_connections
            global m_ptime

            start = time.time()
            f = connection.makefile()
            while True:
                data = f.readline()

                if not data:
                    break

                data = removePut(data)
                out.write(data)
                m_lines += 1

            f.close()
            connection.close()

            end = time.time()
            m_ptime += (end - start)
            m_connections += 1
            printmetrics()

        def removePut(line):
            if line.startswith('put '):
                return line[4:]
            else:
                return line

        try:
            if tcp_bridge_conf.port():
                PORT = tcp_bridge_conf.port()

            if tcp_bridge_conf.host():
                HOST = tcp_bridge_conf.host()

            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind((HOST, PORT))
            sock.listen(1)

        except socket.error, msg:
            utils.err('could not open socket: %s' % msg)
            sys.exit(1)

        try:
            flush_delay = tcp_bridge_conf.flush_delay()
        except AttributeError:
            flush_delay = 60

        try:
            try:
                while 1:
                    connection, address = sock.accept()
                    start_new_thread(clientthread, (connection,))

            except KeyboardInterrupt:
                utils.err("keyboard interrupt, exiting")

        finally:
            sock.close()
Example #52
0
    def __call__(self):
        init_stats = {
            "read_requests": 0,
            "read_merged": 0,
            "read_sectors": 0,
            "msec_read": 0,
            "write_requests": 0,
            "write_merged": 0,
            "write_sectors": 0,
            "msec_write": 0,
            "ios_in_progress": 0,
            "msec_total": 0,
            "msec_weighted_total": 0,
        }
        prev_stats = dict()
        with utils.lower_privileges(self._logger):
            self.f_diskstats.seek(0)
            ts = int(time.time())
            itv = read_uptime()[0]
            for line in self.f_diskstats:
                # maj, min, devicename, [list of stats, see above]
                values = line.split(None)
                # shortcut the deduper and just skip disks that
                # haven't done a single read.  This eliminates a bunch
                # of loopback, ramdisk, and cdrom devices but still
                # lets us report on the rare case that we actually use
                # a ramdisk.
                if values[3] == "0":
                    continue

                if int(values[1]) % 16 == 0 and int(values[0]) > 1:
                    metric = "iostat.disk."
                else:
                    metric = "iostat.part."

                device = values[2]
                if len(values) == 14:
                    # full stats line
                    for i in range(11):
                        self._readq.nput("%s%s %d %s dev=%s" % (metric, FIELDS_DISK[i], ts, values[i + 3], device))

                    ret = is_device(device, 0)
                    # if a device or a partition, calculate the svctm/await/util
                    if ret:
                        stats = dict(zip(FIELDS_DISK, values[3:]))
                        if device not in prev_stats:
                            prev_stats[device] = init_stats
                        rd_ios = float(stats.get("read_requests"))
                        wr_ios = float(stats.get("write_requests"))
                        nr_ios = rd_ios + wr_ios
                        prev_rd_ios = float(prev_stats[device].get("read_requests"))
                        prev_wr_ios = float(prev_stats[device].get("write_requests"))
                        prev_nr_ios = prev_rd_ios + prev_wr_ios
                        tput = ((nr_ios - prev_nr_ios) * float(self.hz) / float(itv))
                        util = ((float(stats.get("msec_total")) - float(prev_stats[device].get("msec_total"))) * float(self.hz) / float(itv))
                        svctm = 0.0
                        await = 0.0
                        r_await = 0.0
                        w_await = 0.0

                        if tput:
                            svctm = util / tput

                        rd_ticks = stats.get("msec_read")
                        wr_ticks = stats.get("msec_write")
                        prev_rd_ticks = prev_stats[device].get("msec_read")
                        prev_wr_ticks = prev_stats[device].get("msec_write")
                        if rd_ios != prev_rd_ios:
                            r_await = (float(rd_ticks) - float(prev_rd_ticks)) / float(rd_ios - prev_rd_ios)
                        if wr_ios != prev_wr_ios:
                            w_await = (float(wr_ticks) - float(prev_wr_ticks)) / float(wr_ios - prev_wr_ios)
                        if nr_ios != prev_nr_ios:
                            await = (float(rd_ticks) + float(wr_ticks) - float(prev_rd_ticks) - float(prev_wr_ticks)) / float(nr_ios - prev_nr_ios)
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "svctm", ts, svctm, device))
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "r_await", ts, r_await, device))
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "w_await", ts, w_await, device))
                        self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "await", ts, await, device))
                        # can't work for our
                        #self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "util", ts, float(util / 1000.0), device))

                        prev_stats[device] = copy.deepcopy(stats)

                elif len(values) == 7:
                    # partial stats line
                    for i in range(4):
                        self._readq.nput("%s%s %d %s dev=%s" % (metric, FIELDS_PART[i], ts, values[i + 3], device))
                else:
                    self.log_error("Cannot parse /proc/diskstats line: %s", line)
                    continue
Example #53
0
    def __call__(self):
        with utils.lower_privileges(self._logger):
            ret_metrics = []
            devices = []
            self.f_mounts.seek(0)
            ts = int(time.time())

            for line in self.f_mounts:
                # Docs come from the fstab(5)
                # fs_spec     # Mounted block special device or remote filesystem
                # fs_file     # Mount point
                # fs_vfstype  # File system type
                # fs_mntops   # Mount options
                # fs_freq     # Dump(8) utility flags
                # fs_passno   # Order in which filesystem checks are done at reboot time
                try:
                    fs_spec, fs_file, fs_vfstype, fs_mntops, fs_freq, fs_passno = line.split(None)
                except ValueError, e:
                    self.log_exception("can't parse line at /proc/mounts.")
                    continue

                if fs_spec == "none":
                    continue
                elif fs_vfstype in FSTYPE_IGNORE or fs_vfstype.startswith("fuse."):
                    continue
                # startswith(tuple) avoided to preserve support of Python 2.4
                elif fs_file.startswith("/dev") or fs_file.startswith("/sys") or \
                      fs_file.startswith("/proc") or fs_file.startswith("/lib") or \
                      fs_file.startswith("net:"):
                      continue

                # keep /dev/xxx device with shorter fs_file (remove mount binds)
                device_found = False
                if fs_spec.startswith("/dev"):
                    for device in devices:
                        if fs_spec == device[0]:
                            device_found = True
                            if len(fs_file) < len(device[1]):
                                device[1] = fs_file
                            break
                    if not device_found:
                        devices.append([fs_spec, fs_file, fs_vfstype])
                else:
                    devices.append([fs_spec, fs_file, fs_vfstype])

            for device in devices:
                fs_spec, fs_file, fs_vfstype = device
                try:
                    r = os.statvfs(fs_file)
                except OSError, e:
                    self.log_exception("can't get info for mount point: %s: %s" % (fs_file, e))
                    continue

                used = r.f_blocks - r.f_bfree

                # conditional expression avoided to preserve support of Python 2.4
                # percent_used = 100 if r.f_blocks == 0 else used * 100.0 / r.f_blocks
                if r.f_blocks == 0:
                    percent_used = 100
                else:
                    percent_used = used * 100.0 / r.f_blocks

                self._readq.nput("df.bytes.total %d %s mount=%s fstype=%s"
                      % (ts, r.f_frsize * r.f_blocks, fs_file, fs_vfstype))
                self._readq.nput("df.bytes.used %d %s mount=%s fstype=%s"
                      % (ts, r.f_frsize * used, fs_file, fs_vfstype))
                self._readq.nput("df.bytes.percentused %d %s mount=%s fstype=%s"
                      % (ts, percent_used, fs_file, fs_vfstype))
                self._readq.nput("df.bytes.free %d %s mount=%s fstype=%s"
                      % (ts, r.f_frsize * r.f_bfree, fs_file, fs_vfstype))

                used = r.f_files - r.f_ffree

                # percent_used = 100 if r.f_files == 0 else used * 100.0 / r.f_files
                if r.f_files == 0:
                    percent_used = 100
                else:
                    percent_used = used * 100.0 / r.f_files

                self._readq.nput("df.inodes.total %d %s mount=%s fstype=%s"
                      % (ts, r.f_files, fs_file, fs_vfstype))
                self._readq.nput("df.inodes.used %d %s mount=%s fstype=%s"
                      % (ts, used, fs_file, fs_vfstype))
                self._readq.nput("df.inodes.percentused %d %s mount=%s fstype=%s"
                      % (ts, percent_used, fs_file, fs_vfstype))
                self._readq.nput("df.inodes.free %d %s mount=%s fstype=%s"
                      % (ts, r.f_ffree, fs_file, fs_vfstype))
Example #54
0
def main():
    if not (tcp_bridge_conf and tcp_bridge_conf.enabled()):
        print >> sys.stderr, 'not enabled, or tcp_bridge_conf unavilable'
        sys.exit(13)
    with utils.lower_privileges(self._logger):

        def printm(string, time, value):
            out.write(m_namespace + string + ' ' + str(time) + ' ' +
                      str(value) + '\n')

        def printmetrics():
            global m_delay
            global m_last

            ts = int(time.time())
            if ts > m_last + m_delay:
                printm('lines_read', ts, m_lines)
                printm('connections_processed', ts, m_connections)
                printm('processing_time', ts, m_ptime)
                printm('active', ts, 1)
                m_last = ts

        def clientthread(connection):
            global m_lines
            global m_connections
            global m_ptime

            start = time.time()
            f = connection.makefile()
            while True:
                data = f.readline()

                if not data:
                    break

                data = removePut(data)
                out.write(data)
                m_lines += 1

            f.close()
            connection.close()

            end = time.time()
            m_ptime += (end - start)
            m_connections += 1
            printmetrics()

        def removePut(line):
            if line.startswith('put '):
                return line[4:]
            else:
                return line

        try:
            if tcp_bridge_conf.port():
                PORT = tcp_bridge_conf.port()

            if tcp_bridge_conf.host():
                HOST = tcp_bridge_conf.host()

            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind((HOST, PORT))
            sock.listen(1)

        except socket.error, msg:
            utils.err('could not open socket: %s' % msg)
            sys.exit(1)

        try:
            flush_delay = tcp_bridge_conf.flush_delay()
        except AttributeError:
            flush_delay = 60

        try:
            try:
                while 1:
                    connection, address = sock.accept()
                    start_new_thread(clientthread, (connection, ))

            except KeyboardInterrupt:
                utils.err("keyboard interrupt, exiting")

        finally:
            sock.close()