def __call__(self): with utils.lower_privileges(self._logger): # We just care about ethN and emN interfaces. We specifically # want to avoid bond interfaces, because interface # stats are still kept on the child interfaces when # you bond. By skipping bond we avoid double counting. self.f_netdev.seek(0) ts = int(time.time()) for line in self.f_netdev: m = re.match( "\s+(eth?\d+|em\d+_\d+/\d+|em\d+_\d+|em\d+|" "p\d+p\d+_\d+/\d+|p\d+p\d+_\d+|p\d+p\d+):(.*)", line) if not m: continue intf = m.group(1) stats = m.group(2).split(None) def direction(idx): if idx >= 8: return "out" return "in" for i in xrange(16): self._readq.nput( "proc.net.%s.%s %d %s iface=%s" % (FIELDS[i], direction(i), ts, stats[i], intf))
def __call__(self): with utils.lower_privileges(self._logger): # Listing bucket everytime so as to start collecting datapoints # of any new bucket. buckets = list_bucket(self.bin_dir) for b in buckets: collect_stats(self.bin_dir, b, self._readq)
def __init__(self, config, logger, readq): super(Netstat, self).__init__(config, logger, readq) self.page_size = resource.getpagesize() try: self.sockstat = open("/proc/net/sockstat") self.netstat = open("/proc/net/netstat") self.snmp = open("/proc/net/snmp") except IOError: self._readq.nput("netstat.state %s %s" % (int(time.time()), '1')) self.log_exception('open failed') self.cleanup() raise with utils.lower_privileges(self._logger): # Note: up until v2.6.37-rc2 most of the values were 32 bits. # The first value is pretty useless since it accounts for some # socket types but not others. So we don't report it because it's # more confusing than anything else and it's not well documented # what type of sockets are or aren't included in this count. self.regexp = re.compile( "sockets: used \d+\n" "TCP: inuse (?P<tcp_inuse>\d+) orphan (?P<orphans>\d+)" " tw (?P<tw_count>\d+) alloc (?P<tcp_sockets>\d+)" " mem (?P<tcp_pages>\d+)\n" "UDP: inuse (?P<udp_inuse>\d+)" # UDP memory accounting was added in v2.6.25-rc1 "(?: mem (?P<udp_pages>\d+))?\n" # UDP-Lite (RFC 3828) was added in v2.6.20-rc2 "(?:UDPLITE: inuse (?P<udplite_inuse>\d+)\n)?" "RAW: inuse (?P<raw_inuse>\d+)\n" "FRAG: inuse (?P<ip_frag_nqueues>\d+)" " memory (?P<ip_frag_mem>\d+)\n")
def main(argv): with utils.lower_privileges(self._logger): socket.setdefaulttimeout(DEFAULT_TIMEOUT) servers = [] if json is None: utils.err("This collector requires the `json' Python module.") return 1 for conf in elasticsearch_conf.get_servers(): server = httplib.HTTPConnection(*conf) try: server.connect() except socket.error, (erno, e): if erno == errno.ECONNREFUSED: continue raise servers.append(server) if len(servers) == 0: return 13 # No ES running, ask tcollector to not respawn us. status = node_status(server) version = status["version"]["number"] while True: for server in servers: _collect_server(server, version) time.sleep(COLLECTION_INTERVAL)
def main(argv): with utils.lower_privileges(self._logger): socket.setdefaulttimeout(DEFAULT_TIMEOUT) servers = [] if json is None: utils.err("This collector requires the `json' Python module.") return 1 for conf in elasticsearch_conf.get_servers(): server = httplib.HTTPConnection( *conf ) try: server.connect() except socket.error, (erno, e): if erno == errno.ECONNREFUSED: continue raise servers.append( server ) if len( servers ) == 0: return 13 # No ES running, ask tcollector to not respawn us. status = node_status(server) version = status["version"]["number"] while True: for server in servers: _collect_server(server, version) time.sleep(COLLECTION_INTERVAL)
def main(): with utils.lower_privileges(self._logger): if pymongo is None: print >>sys.stderr, "error: Python module `pymongo' is missing" return 13 c = pymongo.Connection(host=HOST, port=PORT) while True: res = c.admin.command('serverStatus') ts = int(time.time()) for base_metric, tags in TAG_METRICS: for tag in tags: print 'mongo.%s %d %s type=%s' % (base_metric, ts, res[base_metric][tag], tag) for metric in METRICS: cur = res try: for m in metric.split('.'): cur = cur[m] except KeyError: continue print 'mongo.%s %d %s' % (metric, ts, cur) sys.stdout.flush() time.sleep(INTERVAL)
def __init__(self, config, logger, readq): super(Mongo3, self).__init__(config, logger, readq) self.loadEnv() try: with utils.lower_privileges(self._logger): if pymongo is None: self.log_error('can not load pymongo module') self._readq.nput("mongo3.state %s %s" % (int(time.time()), '1')) for index, item in enumerate(CONFIG_CONN, start=0): conn = pymongo.MongoClient(host=item['host'], port=item['port']) if USER: conn.admin.authenticate(USER, PASS, mechanism='DEFAULT') CONFIG_CONN[index]['link'] = conn for index, item in enumerate(MONGOS_CONN, start=0): conn = pymongo.MongoClient(host=item['host'], port=item['port']) if USER: conn.admin.authenticate(USER, PASS, mechanism='DEFAULT') MONGOS_CONN[index]['link'] = conn for index, item in enumerate(REPLICA_CONN, start=0): conn = pymongo.MongoClient(host=item['host'], port=item['port']) if USER: conn.admin.authenticate(USER, PASS, mechanism='DEFAULT') REPLICA_CONN[index]['link'] = conn except: self._readq.nput("mongo3.state %s %s" % (int(time.time()), '1'))
def __init__(self, config, logger, readq): super(Netstat, self).__init__(config, logger, readq) self.page_size = resource.getpagesize() try: self.sockstat = open("/proc/net/sockstat") self.netstat = open("/proc/net/netstat") self.snmp = open("/proc/net/snmp") except IOError: self.log_exception('open failed') self.cleanup() raise with utils.lower_privileges(self._logger): # Note: up until v2.6.37-rc2 most of the values were 32 bits. # The first value is pretty useless since it accounts for some # socket types but not others. So we don't report it because it's # more confusing than anything else and it's not well documented # what type of sockets are or aren't included in this count. self.regexp = re.compile("sockets: used \d+\n" "TCP: inuse (?P<tcp_inuse>\d+) orphan (?P<orphans>\d+)" " tw (?P<tw_count>\d+) alloc (?P<tcp_sockets>\d+)" " mem (?P<tcp_pages>\d+)\n" "UDP: inuse (?P<udp_inuse>\d+)" # UDP memory accounting was added in v2.6.25-rc1 "(?: mem (?P<udp_pages>\d+))?\n" # UDP-Lite (RFC 3828) was added in v2.6.20-rc2 "(?:UDPLITE: inuse (?P<udplite_inuse>\d+)\n)?" "RAW: inuse (?P<raw_inuse>\d+)\n" "FRAG: inuse (?P<ip_frag_nqueues>\d+)" " memory (?P<ip_frag_mem>\d+)\n")
def __call__(self): with utils.lower_privileges(self._logger): if json: self._readq.nput("hadoop.namenode.state %s %s" % (int(time.time()), '0')) HadoopNode(self.service, self.daemon, self.host, self.port, REPLACEMENTS, self.readq, self._logger).emit() else: self._readq.nput("hadoop.namenode.state %s %s" % (int(time.time()), '1')) self.logger.error("This collector requires the `json' Python module.")
def __call__(self): with utils.lower_privileges(self._logger): if json: HBaseRegionserverHttp(self.port, self.logger, self.readq).emit() else: self.logger.error( "This collector requires the `json' Python module.")
def __call__(self): with utils.lower_privileges(self._logger): # Connect to Docker socket to get informations about containers every 4 times if self.cache == 0: self.containernames = {} self.containerimages = {} self.cache += 1 if self.cache == 4: self.cache = 0 if os.path.isdir(self.cgroup_path): for level1 in os.listdir(self.cgroup_path): if (os.path.isdir(self.cgroup_path + "/" + level1 + "/docker") and # /cgroup/cpu and /cgroup/cpuacct are often links to /cgroup/cpu,cpuacct not (((level1 == "cpu,cpuacct") or (level1 == "cpuacct")) and (os.path.isdir(self.cgroup_path + "/cpu/docker")))): for level2 in os.listdir(self.cgroup_path + "/" + level1 + "/docker"): if os.path.isdir(self.cgroup_path + "/" + level1 + "/docker/" + level2): self.readdockerstats( self.cgroup_path + "/" + level1 + "/docker/" + level2, level2) else: # If Docker cgroup is handled by slice # http://www.freedesktop.org/software/systemd/man/systemd.slice.html for slicename in ("system.slice", "machine.slice", "user.slice"): if (os.path.isdir(self.cgroup_path + "/" + level1 + "/" + slicename) and # /cgroup/cpu and /cgroup/cpuacct are often links to /cgroup/cpu,cpuacct not ( ((level1 == "cpu,cpuacct") or (level1 == "cpuacct")) and (os.path.isdir(self.cgroup_path + "/cpu/" + slicename)))): for level2 in os.listdir(self.cgroup_path + "/" + level1 + "/" + slicename): if os.path.isdir(self.cgroup_path + "/" + level1 + "/" + slicename + "/" + level2): m = re.search("^docker-(\w+)\.scope$", level2) if m: self.readdockerstats( self.cgroup_path + "/" + level1 + "/" + slicename + "/" + level2, m.group(1)) break if os.path.isdir(self.cgroup_path + "/lxc"): for level1 in os.listdir(self.cgroup_path + "/lxc"): if os.path.isdir(self.cgroup_path + "/lxc/" + level1): self.readdockerstats( self.cgroup_path + "/lxc/" + level1, level1)
def __call__(self): with utils.lower_privileges(self._logger): try: # collect period 60 secs url = self.get_config('stats_url', 'http://localhost:9999/stats.txt') response = urllib2.urlopen(url) content = response.read() return self.process(content) except: self.log_exception('unexpected error.')
def __init__(self, config, logger, readq): super(Docker, self).__init__(config, logger, readq) self.containernames = {} self.containerimages = {} with utils.lower_privileges(self._logger): self.cache = 0 if platform.dist()[0] in ['centos', 'redhat'] and not platform.dist()[1].startswith("7."): self.cgroup_path = '/cgroup' else: self.cgroup_path = '/sys/fs/cgroup' self.socket_path = '/var/run/docker.sock'
def __call__(self): with utils.lower_privileges(self._logger): counter = {} for procfile in (self.tcp, self.tcp6): if procfile is None: continue procfile.seek(0) ts = int(time.time()) for line in procfile: try: # pylint: disable=W0612 (num, src, dst, state, queue, when, retrans, uid, timeout, inode) = line.split(None, 9) except ValueError: # Malformed line continue if num == "sl": # header continue srcport = src.split(":")[1] dstport = dst.split(":")[1] srcport = int(srcport, 16) dstport = int(dstport, 16) service = PORTS.get(srcport, "other") service = PORTS.get(dstport, service) if is_public_ip(dst) or is_public_ip(src): endpoint = "external" else: endpoint = "internal" user = self.uids.get(uid, "other") key = "state=" + TCPSTATES[state] + " endpoint=" + endpoint + \ " service=" + service + " user="******"other", ): key = ("state=%s service=%s" % (TCPSTATES[state], service)) if key in counter: self._readq.nput("proc.net.tcp {0} {1} {2}".format( ts, counter[key], key)) else: self._readq.nput("proc.net.tcp {0} {1} {2}".format( ts, "0", key)) self._readq.nput("procnettcp.state %s %s" % (int(time.time()), '0'))
def __call__(self): with utils.lower_privileges(self._logger): try: self.get_container_stats(self.alauda_session, DEFAULT_NAMESPACE, DEFAULT_TOKEN) self.numExceptionHit = 0 except Exception: self.log_exception('exception collecting Alauda docker metrics') self.numExceptionHit += 1 if self.numExceptionHit > MAX_EXCEPTION_HIT: self.cleanup() self._init_alauda_session()
def main(): if not (graphite_bridge_conf and graphite_bridge_conf.enabled()): sys.exit(13) with utils.lower_privileges(self._logger): server = GraphiteServer((HOST, PORT), GraphiteHandler) server.daemon_threads = True try: server.serve_forever() except KeyboardInterrupt: server.shutdown() server.server_close()
def __call__(self): with utils.lower_privileges(self._logger): if json: self._readq.nput("hbase.regionserver.state %s %s" % (int(time.time()), '0')) HBaseRegionserverHttp(self.host, self.port, self.logger, self.readq).emit() else: self._readq.nput("hbase.regionserver.state %s %s" % (int(time.time()), '1')) self.logger.error( "This collector requires the `json' Python module.")
def __call__(self): with utils.lower_privileges(self._logger): counter = {} for procfile in (self.tcp, self.tcp6): if procfile is None: continue procfile.seek(0) ts = int(time.time()) for line in procfile: try: # pylint: disable=W0612 (num, src, dst, state, queue, when, retrans, uid, timeout, inode) = line.split(None, 9) except ValueError: # Malformed line continue if num == "sl": # header continue srcport = src.split(":")[1] dstport = dst.split(":")[1] srcport = int(srcport, 16) dstport = int(dstport, 16) service = PORTS.get(srcport, "other") service = PORTS.get(dstport, service) if is_public_ip(dst) or is_public_ip(src): endpoint = "external" else: endpoint = "internal" user = self.uids.get(uid, "other") key = "state=" + TCPSTATES[state] + " endpoint=" + endpoint + \ " service=" + service + " user="******"other",): for user in USERS + ("other",): for endpoint in ("internal", "external"): key = ("state=%s endpoint=%s service=%s user=%s" % (TCPSTATES[state], endpoint, service, user)) if key in counter: self._readq.nput("proc.net.tcp {0} {1} {2}".format(ts, counter[key], key)) else: self._readq.nput("proc.net.tcp {0} {1} {2}".format(ts, "0", key))
def __init__(self, config, logger, readq): super(Docker, self).__init__(config, logger, readq) self.containernames = {} self.containerimages = {} with utils.lower_privileges(self._logger): self.cache = 0 if platform.dist()[0] in [ 'centos', 'redhat' ] and not platform.dist()[1].startswith("7."): self.cgroup_path = '/cgroup' else: self.cgroup_path = '/sys/fs/cgroup' self.socket_path = '/var/run/docker.sock'
def main(): if not (udp_bridge_conf and udp_bridge_conf.enabled()): sys.exit(13) with utils.lower_privileges(self._logger): def removePut(line): if line.startswith('put '): return line[4:] else: return line try: if (udp_bridge_conf and udp_bridge_conf.usetcp()): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) else: sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.bind((HOST, PORT)) except socket.error, msg: utils.err('could not open socket: %s' % msg) sys.exit(1) try: flush_delay = udp_bridge_conf.flush_delay() except AttributeError: flush_delay = 60 flush_timeout = int(time.time()) try: try: while 1: data, address = sock.recvfrom(SIZE) if data: lines = data.splitlines() data = '\n'.join(map(removePut, lines)) if not data: utils.err("invalid data") break print data now = int(time.time()) if now > flush_timeout: sys.stdout.flush() flush_timeout = now + flush_delay except KeyboardInterrupt: utils.err("keyboard interrupt, exiting") finally: sock.close()
def call(self, metric): try: with utils.lower_privileges(self._logger): if json: self.exeClass(self.service, self.daemon, self.host, self.port, self.REPLACEMENTS, self.readq, self._logger).emit() self._readq.nput("%s %s %s" % (metric, int(time.time()), '0')) else: self._readq.nput("%s %s %s" % (metric, int(time.time()), '1')) self.log_error( "This collector requires the `json' Python module.") except Exception, e: self._readq.nput("%s %s %s" % (metric, int(time.time()), '1')) self.log_error("metric is %s error is %s" % (metric, str(e)))
def __call__(self): with utils.lower_privileges(self._logger): try: self.validate_config() regions = ec2_list_regions() for reg in regions: for statistic in STATISTICS: t = threading.Thread(target=self.handle_region, kwargs={"region": reg, "statistic": statistic}) t.start() while threading.activeCount() > 1: time.sleep(1) except exceptions.KeyboardInterrupt: return 0 except: raise if not sendQueue.empty(): self.send_metrics()
def main(): if not (jolokia_conf and jolokia_conf.enabled()): utils.err("Jolokia collector disable by config") sys.exit(13) with utils.lower_privileges(self._logger): CONFIG = jolokia_conf.get_config() instances = [] for instance in CONFIG['instances']: if 'common_tags' in CONFIG: if 'tags' in instance: instance['tags'].update(CONFIG['common_tags']) else: instance['tags'] = CONFIG['common_tags'] if 'common_monitors' in CONFIG: if 'monitors' in instance: instance['monitors'] += CONFIG['common_monitors'] else: instance['monitors'] = CONFIG['common_monitors'] if not 'monitors' in instance: utils.err("error: no monitors configured") sys.exit(13) if not 'tags' in instance: instance['tags'] = [] if not 'auth' in instance: instance['auth'] = {'username': '', 'password': ''} jc = JolokiaCollector(instance['url'], instance['auth'], instance['tags'], instance['monitors']) instances.append(jc) # LOOP!! while True: for i in instances: i.process_data() try: time.sleep(CONFIG['interval']) except KeyboardInterrupt: break
def main(): with utils.lower_privileges(self._logger): if BinLogStreamReader is None: utils.err("error: Python module `pymysqlreplication' is missing") return 1 if pymysql is None: utils.err("error: Python module `pymysql' is missing") return 1 settings = zabbix_bridge_conf.get_settings() # Set blocking to True if you want to block and wait for the next event at # the end of the stream stream = BinLogStreamReader(connection_settings=settings['mysql'], server_id=settings['slaveid'], only_events=[WriteRowsEvent], resume_stream=True, blocking=True) hostmap = gethostmap(settings) # Prime initial hostmap for binlogevent in stream: if binlogevent.schema == settings['mysql']['db']: table = binlogevent.table log_pos = binlogevent.packet.log_pos if table == 'history' or table == 'history_uint': for row in binlogevent.rows: r = row['values'] itemid = r['itemid'] try: hm = hostmap[itemid] print "zbx.%s %d %s host=%s proxy=%s" % ( hm['key'], r['clock'], r['value'], hm['host'], hm['proxy']) except KeyError: # TODO: Consider https://wiki.python.org/moin/PythonDecoratorLibrary#Retry hostmap = gethostmap(settings) utils.err("error: Key lookup miss for %s" % (itemid)) sys.stdout.flush() # if n seconds old, reload # settings['gethostmap_interval'] stream.close()
def main(): with utils.lower_privileges(self._logger): bad_regex = re.compile("[,()]+") # avoid forbidden by TSD symbols while True: try: if vstats == "all": stats = subprocess.Popen( ["varnishstat", "-1", "-j"], stdout=subprocess.PIPE, ) else: fields = ",".join(vstats) stats = subprocess.Popen( ["varnishstat", "-1", "-f" + fields, "-j"], stdout=subprocess.PIPE, ) except OSError, e: # Die and signal to tcollector not to run this script. sys.stderr.write("Error: %s\n" % e) sys.exit(13) metrics = "" for line in stats.stdout.readlines(): metrics += line metrics = json.loads(metrics) timestamp = "" if use_varnishstat_timestamp: pattern = "%Y-%m-%dT%H:%M:%S" timestamp = int(time.mktime(time.strptime(metrics['timestamp'], pattern))) else: timestamp = time.time() for k, v in metrics.iteritems(): if k != "timestamp" and None == bad_regex.search(k): metric_name = metric_prefix + "." + k print "%s %d %s %s" % \ (metric_name, timestamp, v['value'], ",".join(tags)) sys.stdout.flush() time.sleep(interval)
def __call__(self): with utils.lower_privileges(self._logger): cpu_time = 0 try: s = self.process.stat() except ProcessTerminatedError: self.log_warn("process terminated, abort") return cpu_time += int(s["utime"]) cpu_time += int(s["cutime"]) cpu_time += int(s["stime"]) cpu_time += int(s["cstime"]) cpu_time += int(s["guest_time"]) cpu_time += int(s["cguest_time"]) ts = int(time.time()) self._readq.nput("cloudwiz-agent.cputime %s %s" % (ts, cpu_time)) self._readq.nput("cloudwiz-agent.mem_bytes %s %s type=vsize" % (ts, s["vsize"])) self._readq.nput("cloudwiz-agent.mem_bytes %s %s type=rss" % (ts, int(s["rss"]) * resource.getpagesize()))
def main(): with utils.lower_privileges(self._logger): if BinLogStreamReader is None: utils.err("error: Python module `pymysqlreplication' is missing") return 1 if pymysql is None: utils.err("error: Python module `pymysql' is missing") return 1 settings = zabbix_bridge_conf.get_settings() # Set blocking to True if you want to block and wait for the next event at # the end of the stream stream = BinLogStreamReader(connection_settings=settings['mysql'], server_id=settings['slaveid'], only_events=[WriteRowsEvent], resume_stream=True, blocking=True) hostmap = gethostmap(settings) # Prime initial hostmap for binlogevent in stream: if binlogevent.schema == settings['mysql']['db']: table = binlogevent.table log_pos = binlogevent.packet.log_pos if table == 'history' or table == 'history_uint': for row in binlogevent.rows: r = row['values'] itemid = r['itemid'] try: hm = hostmap[itemid] print "zbx.%s %d %s host=%s proxy=%s" % (hm['key'], r['clock'], r['value'], hm['host'], hm['proxy']) except KeyError: # TODO: Consider https://wiki.python.org/moin/PythonDecoratorLibrary#Retry hostmap = gethostmap(settings) utils.err("error: Key lookup miss for %s" % (itemid)) sys.stdout.flush() # if n seconds old, reload # settings['gethostmap_interval'] stream.close()
def __call__(self): with utils.lower_privileges(self._logger): try: self.validate_config() regions = ec2_list_regions() for reg in regions: for statistic in STATISTICS: t = threading.Thread(target=self.handle_region, kwargs={ "region": reg, "statistic": statistic }) t.start() while threading.activeCount() > 1: time.sleep(1) except exceptions.KeyboardInterrupt: return 0 except: raise if not sendQueue.empty(): self.send_metrics()
def __call__(self): with utils.lower_privileges(self._logger): # Connect to Docker socket to get informations about containers every 4 times if self.cache == 0: self.containernames = {} self.containerimages = {} self.cache += 1 if self.cache == 4: self.cache = 0 if os.path.isdir(self.cgroup_path): for level1 in os.listdir(self.cgroup_path): if (os.path.isdir(self.cgroup_path + "/" + level1 + "/docker") and # /cgroup/cpu and /cgroup/cpuacct are often links to /cgroup/cpu,cpuacct not (((level1 == "cpu,cpuacct") or (level1 == "cpuacct")) and ( os.path.isdir(self.cgroup_path + "/cpu/docker")))): for level2 in os.listdir(self.cgroup_path + "/" + level1 + "/docker"): if os.path.isdir(self.cgroup_path + "/" + level1 + "/docker/" + level2): self.readdockerstats(self.cgroup_path + "/" + level1 + "/docker/" + level2, level2) else: # If Docker cgroup is handled by slice # http://www.freedesktop.org/software/systemd/man/systemd.slice.html for slicename in ("system.slice", "machine.slice", "user.slice"): if (os.path.isdir(self.cgroup_path + "/" + level1 + "/" + slicename) and # /cgroup/cpu and /cgroup/cpuacct are often links to /cgroup/cpu,cpuacct not (((level1 == "cpu,cpuacct") or (level1 == "cpuacct")) and ( os.path.isdir(self.cgroup_path + "/cpu/" + slicename)))): for level2 in os.listdir(self.cgroup_path + "/" + level1 + "/" + slicename): if os.path.isdir(self.cgroup_path + "/" + level1 + "/" + slicename + "/" + level2): m = re.search("^docker-(\w+)\.scope$", level2) if m: self.readdockerstats( self.cgroup_path + "/" + level1 + "/" + slicename + "/" + level2, m.group(1)) break if os.path.isdir(self.cgroup_path + "/lxc"): for level1 in os.listdir(self.cgroup_path + "/lxc"): if os.path.isdir(self.cgroup_path + "/lxc/" + level1): self.readdockerstats(self.cgroup_path + "/lxc/" + level1, level1)
def main(): loadEnv() with utils.lower_privileges(self._logger): if pymongo is None: print >>sys.stderr, "error: Python module `pymongo' is missing" return 13 for index, item in enumerate(CONFIG_CONN, start=0): conn = pymongo.MongoClient(host=item['host'], port=item['port']) if USER: conn.admin.authenticate(USER, PASS, mechanism='DEFAULT') CONFIG_CONN[index]['link'] = conn for index, item in enumerate(MONGOS_CONN, start=0): conn = pymongo.MongoClient(host=item['host'], port=item['port']) if USER: conn.admin.authenticate(USER, PASS, mechanism='DEFAULT') MONGOS_CONN[index]['link'] = conn for index, item in enumerate(REPLICA_CONN, start=0): conn = pymongo.MongoClient(host=item['host'], port=item['port']) if USER: conn.admin.authenticate(USER, PASS, mechanism='DEFAULT') REPLICA_CONN[index]['link'] = conn while True: for conn in CONFIG_CONN: runServerStatus(conn['link']) for conn in MONGOS_CONN: runDbStats(conn['link']) for conn in REPLICA_CONN: runReplSetGetStatus(conn['link']) sys.stdout.flush() time.sleep(INTERVAL)
def main(): """Main loop""" # don't run if we're not a riak node if not os.path.exists("/usr/lib/riak"): sys.exit(13) with utils.lower_privileges(self._logger): sys.stdin.close() interval = 15 def print_stat(metric, value, tags=""): if value is not None: print "riak.%s %d %s %s" % (metric, ts, value, tags) while True: ts = int(time.time()) req = urllib2.urlopen("http://localhost:8098/stats") if req is not None: obj = json.loads(req.read()) for key in obj: if key not in MAP: continue # this is a hack, but Riak reports latencies in microseconds. they're fairly useless # to our human operators, so we're going to convert them to seconds. if 'latency' in MAP[key][0]: obj[key] = obj[key] / 1000000.0 print_stat(MAP[key][0], obj[key], MAP[key][1]) if 'connected_nodes' in obj: print_stat('connected_nodes', len(obj['connected_nodes']), '') req.close() sys.stdout.flush() time.sleep(interval)
def __call__(self): with utils.lower_privileges(self._logger): # We just care about ethN and emN interfaces. We specifically # want to avoid bond interfaces, because interface # stats are still kept on the child interfaces when # you bond. By skipping bond we avoid double counting. self.f_netdev.seek(0) ts = int(time.time()) for line in self.f_netdev: m = re.match("\s+(eth?\d+|em\d+_\d+/\d+|em\d+_\d+|em\d+|" "p\d+p\d+_\d+/\d+|p\d+p\d+_\d+|p\d+p\d+):(.*)", line) if not m: continue intf = m.group(1) stats = m.group(2).split(None) def direction(idx): if idx >= 8: return "out" return "in" for i in xrange(16): self._readq.nput("proc.net.%s %d %s iface=%s direction=%s" % (FIELDS[i], ts, stats[i], intf, direction(i)))
def main(): """ntpstats main loop""" collection_interval=DEFAULT_COLLECTION_INTERVAL if(ntpstat_conf): config = ntpstat_conf.get_config() collection_interval=config['collection_interval'] with utils.lower_privileges(self._logger): while True: ts = int(time.time()) try: ntp_proc = subprocess.Popen(["ntpq", "-p"], stdout=subprocess.PIPE) except OSError, e: if e.errno == errno.ENOENT: # looks like ntpdc is not available, stop using this collector sys.exit(13) # we signal tcollector to stop using this raise stdout, _ = ntp_proc.communicate() if ntp_proc.returncode == 0: for line in stdout.split("\n"): if not line: continue fields = line.split() if len(fields) <= 0: continue if fields[0].startswith("*"): offset=fields[8] continue print ("ntp.offset %d %s" % (ts, offset)) else: print >> sys.stderr, "ntpq -p, returned %r" % (ntp_proc.returncode) sys.stdout.flush() time.sleep(collection_interval)
def __call__(self): with utils.lower_privileges(self._logger): if json: HadoopNode(self.service, self.daemon, self.host, self.port, REPLACEMENTS, self.readq, self._logger).emit() else: self.logger.error("This collector requires the `json' Python module.")
def __call__(self): with utils.lower_privileges(self._logger): # proc.uptime self.f_uptime.seek(0) ts = int(time.time()) for line in self.f_uptime: m = re.match("(\S+)\s+(\S+)", line) if m: self._readq.nput("proc.uptime.total %d %s" % (ts, m.group(1))) self._readq.nput("proc.uptime.now %d %s" % (ts, m.group(2))) # proc.meminfo self.f_meminfo.seek(0) ts = int(time.time()) for line in self.f_meminfo: m = re.match("(\w+):\s+(\d+)\s+(\w+)", line) if m: if m.group(3).lower() == 'kb': # convert from kB to B for easier graphing value = str(int(m.group(2)) * 1024) else: value = m.group(2) self._readq.nput("proc.meminfo.%s %d %s" % (m.group(1).lower(), ts, value)) # proc.vmstat self.f_vmstat.seek(0) ts = int(time.time()) for line in self.f_vmstat: m = re.match("(\w+)\s+(\d+)", line) if not m: continue if m.group(1) in ("pgpgin", "pgpgout", "pswpin", "pswpout", "pgfault", "pgmajfault"): self._readq.nput("proc.vmstat.%s %d %s" % (m.group(1), ts, m.group(2))) # proc.stat self.f_stat.seek(0) ts = int(time.time()) for line in self.f_stat: m = re.match("(\w+)\s+(.*)", line) if not m: continue if m.group(1).startswith("cpu"): cpu_m = re.match("cpu(\d+)", m.group(1)) if cpu_m: metric_percpu = '.percpu' tags = ' cpu=%s' % cpu_m.group(1) else: metric_percpu = '' tags = '' fields = m.group(2).split() cpu_types = ['user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq', 'guest', 'guest_nice'] # We use zip to ignore fields that don't exist. for value, field_name in zip(fields, cpu_types): self._readq.nput("proc.stat.cpu%s %d %s type=%s%s" % (metric_percpu, ts, value, field_name, tags)) elif m.group(1) == "intr": self._readq.nput("proc.stat.intr %d %s" % (ts, m.group(2).split()[0])) elif m.group(1) == "ctxt": self._readq.nput("proc.stat.ctxt %d %s" % (ts, m.group(2))) elif m.group(1) == "processes": self._readq.nput("proc.stat.processes %d %s" % (ts, m.group(2))) elif m.group(1) == "procs_blocked": self._readq.nput("proc.stat.procs_blocked %d %s" % (ts, m.group(2))) self.f_loadavg.seek(0) ts = int(time.time()) for line in self.f_loadavg: m = re.match("(\S+)\s+(\S+)\s+(\S+)\s+(\d+)/(\d+)\s+", line) if not m: continue self._readq.nput("proc.loadavg.1min %d %s" % (ts, m.group(1))) self._readq.nput("proc.loadavg.5min %d %s" % (ts, m.group(2))) self._readq.nput("proc.loadavg.15min %d %s" % (ts, m.group(3))) self._readq.nput("proc.loadavg.runnable %d %s" % (ts, m.group(4))) self._readq.nput("proc.loadavg.total_threads %d %s" % (ts, m.group(5))) self.f_entropy_avail.seek(0) ts = int(time.time()) for line in self.f_entropy_avail: self._readq.nput("proc.kernel.entropy_avail %d %s" % (ts, line.strip())) self.f_interrupts.seek(0) ts = int(time.time()) # Get number of CPUs from description line. num_cpus = len(self.f_interrupts.readline().split()) for line in self.f_interrupts: cols = line.split() irq_type = cols[0].rstrip(":") if irq_type.isalnum(): if irq_type.isdigit(): if cols[-2] == "PCI-MSI-edge" and "eth" in cols[-1]: irq_type = cols[-1] else: continue # Interrupt type is just a number, ignore. for i, val in enumerate(cols[1:]): if i >= num_cpus: # All values read, remaining cols contain textual # description break if not val.isdigit(): # something is weird, there should only be digit values self.log_error("Unexpected interrupts value %r in %r: ", val, cols) break self._readq.nput("proc.interrupts %s %s type=%s cpu=%s" % (ts, val, irq_type, i)) self.f_softirqs.seek(0) ts = int(time.time()) # Get number of CPUs from description line. num_cpus = len(self.f_softirqs.readline().split()) for line in self.f_softirqs: cols = line.split() irq_type = cols[0].rstrip(":") for i, val in enumerate(cols[1:]): if i >= num_cpus: # All values read, remaining cols contain textual # description break if not val.isdigit(): # something is weird, there should only be digit values self.log_error("Unexpected softirq value %r in %r: ", val, cols) break self._readq.nput("proc.softirqs %s %s type=%s cpu=%s" % (ts, val, irq_type, i)) self._print_numa_stats(self.numastats) # Print scaling stats ts = int(time.time()) for cpu_no in self.f_scaling_min.keys(): f = self.f_scaling_min[cpu_no] f.seek(0) for line in f: self._readq.nput("proc.scaling.min %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no)) ts = int(time.time()) for cpu_no in self.f_scaling_max.keys(): f = self.f_scaling_max[cpu_no] f.seek(0) for line in f: self._readq.nput("proc.scaling.max %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no)) ts = int(time.time()) for cpu_no in self.f_scaling_cur.keys(): f = self.f_scaling_cur[cpu_no] f.seek(0) for line in f: self._readq.nput("proc.scaling.cur %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no))
"remove", "rmdir", "rename", "link", "readdir", "readdirplus", "fsstat", "fsinfo", "pathconf", "commit", ), } def main(): """nfsstat main loop""" try: f_nfs = open("/proc/net/rpc/nfs") except IOError, e: print >>sys.stderr, "Failed to open input file: %s" % (e,) return 13 # Ask tcollector to not re-start us immediately. with utils.lower_privileges(self._logger): while True: f_nfs.seek(0) ts = int(time.time()) for line in f_nfs: fields = line.split() if fields[0] in nfs_client_proc_names.keys(): # NFSv4 # first entry should equal total count of subsequent entries assert int(fields[1]) == len(fields[2:]), ( "reported count (%d) does not equal list length (%d)" % (int(fields[1]), len(fields[2:]))) for idx, val in enumerate(fields[2:]): try: print ("nfs.client.rpc %d %s op=%s version=%s" % (ts, int(val), nfs_client_proc_names[fields[0]][idx], fields[0][4:]))
"pathconf", "commit", ), } def main(): """nfsstat main loop""" try: f_nfs = open("/proc/net/rpc/nfs") except IOError, e: print >> sys.stderr, "Failed to open input file: %s" % (e, ) return 13 # Ask tcollector to not re-start us immediately. with utils.lower_privileges(self._logger): while True: f_nfs.seek(0) ts = int(time.time()) for line in f_nfs: fields = line.split() if fields[0] in nfs_client_proc_names.keys(): # NFSv4 # first entry should equal total count of subsequent entries assert int(fields[1]) == len(fields[2:]), ( "reported count (%d) does not equal list length (%d)" % (int(fields[1]), len(fields[2:]))) for idx, val in enumerate(fields[2:]): try: print("nfs.client.rpc %d %s op=%s version=%s" % (ts, int(val),
def __call__(self): with utils.lower_privileges(self._logger): ret_metrics = [] devices = [] self.f_mounts.seek(0) ts = int(time.time()) for line in self.f_mounts: # Docs come from the fstab(5) # fs_spec # Mounted block special device or remote filesystem # fs_file # Mount point # fs_vfstype # File system type # fs_mntops # Mount options # fs_freq # Dump(8) utility flags # fs_passno # Order in which filesystem checks are done at reboot time try: fs_spec, fs_file, fs_vfstype, fs_mntops, fs_freq, fs_passno = line.split( None) except ValueError, e: self._readq.nput("df.state %d %s" % (ts, "1")) self.log_exception("can't parse line at /proc/mounts.") continue if fs_spec == "none": continue elif fs_vfstype in FSTYPE_IGNORE or fs_vfstype.startswith( "fuse."): continue # startswith(tuple) avoided to preserve support of Python 2.4 elif fs_file.startswith("/dev") or fs_file.startswith("/sys") or \ fs_file.startswith("/proc") or fs_file.startswith("/lib") or \ fs_file.startswith("net:"): continue # keep /dev/xxx device with shorter fs_file (remove mount binds) device_found = False if fs_spec.startswith("/dev"): for device in devices: if fs_spec == device[0]: device_found = True if len(fs_file) < len(device[1]): device[1] = fs_file break if not device_found: devices.append([fs_spec, fs_file, fs_vfstype]) else: devices.append([fs_spec, fs_file, fs_vfstype]) for device in devices: fs_spec, fs_file, fs_vfstype = device try: r = os.statvfs(fs_file) except OSError, e: self._readq.nput("df.state %d %s" % (ts, "1")) self.log_exception( "can't get info for mount point: %s: %s" % (fs_file, e)) continue used = r.f_blocks - r.f_bfree # conditional expression avoided to preserve support of Python 2.4 # percent_used = 100 if r.f_blocks == 0 else used * 100.0 / r.f_blocks if r.f_blocks == 0: percent_used = 100 else: percent_used = used * 100.0 / r.f_blocks self._readq.nput( "df.bytes.total %d %s mount=%s fstype=%s" % (ts, r.f_frsize * r.f_blocks, fs_file, fs_vfstype)) self._readq.nput("df.bytes.used %d %s mount=%s fstype=%s" % (ts, r.f_frsize * used, fs_file, fs_vfstype)) self._readq.nput( "df.bytes.percentused %d %s mount=%s fstype=%s" % (ts, percent_used, fs_file, fs_vfstype)) self._readq.nput( "df.bytes.free %d %s mount=%s fstype=%s" % (ts, r.f_frsize * r.f_bfree, fs_file, fs_vfstype)) used = r.f_files - r.f_ffree # percent_used = 100 if r.f_files == 0 else used * 100.0 / r.f_files if r.f_files == 0: percent_used = 100 else: percent_used = used * 100.0 / r.f_files self._readq.nput("df.inodes.total %d %s mount=%s fstype=%s" % (ts, r.f_files, fs_file, fs_vfstype)) self._readq.nput("df.inodes.used %d %s mount=%s fstype=%s" % (ts, used, fs_file, fs_vfstype)) self._readq.nput( "df.inodes.percentused %d %s mount=%s fstype=%s" % (ts, percent_used, fs_file, fs_vfstype)) self._readq.nput("df.inodes.free %d %s mount=%s fstype=%s" % (ts, r.f_ffree, fs_file, fs_vfstype)) self._readq.nput("df.state %d %s" % (ts, "0"))
def __call__(self): with utils.lower_privileges(self._logger): containers = self.get_container_list() for containername in containers: self.get_container_stats(containername)
def __call__(self): init_stats = { "read_requests": 0, "read_merged": 0, "read_sectors": 0, "msec_read": 0, "write_requests": 0, "write_merged": 0, "write_sectors": 0, "msec_write": 0, "ios_in_progress": 0, "msec_total": 0, "msec_weighted_total": 0, } prev_stats = dict() with utils.lower_privileges(self._logger): self.f_diskstats.seek(0) ts = int(time.time()) itv = read_uptime()[0] for line in self.f_diskstats: # maj, min, devicename, [list of stats, see above] values = line.split(None) # shortcut the deduper and just skip disks that # haven't done a single read. This eliminates a bunch # of loopback, ramdisk, and cdrom devices but still # lets us report on the rare case that we actually use # a ramdisk. if values[3] == "0": continue if int(values[1]) % 16 == 0 and int(values[0]) > 1: metric = "iostat.disk." else: metric = "iostat.part." device = values[2] if len(values) == 14: # full stats line for i in range(11): self._readq.nput("%s%s %d %s dev=%s" % (metric, FIELDS_DISK[i], ts, values[i + 3], device)) ret = is_device(device, 0) # if a device or a partition, calculate the svctm/await/util if ret: stats = dict(zip(FIELDS_DISK, values[3:])) if device not in prev_stats: prev_stats[device] = init_stats rd_ios = float(stats.get("read_requests")) wr_ios = float(stats.get("write_requests")) nr_ios = rd_ios + wr_ios prev_rd_ios = float(prev_stats[device].get("read_requests")) prev_wr_ios = float(prev_stats[device].get("write_requests")) prev_nr_ios = prev_rd_ios + prev_wr_ios tput = ((nr_ios - prev_nr_ios) * float(self.hz) / float(itv)) util = ((float(stats.get("msec_total")) - float(prev_stats[device].get("msec_total"))) * float(self.hz) / float(itv)) svctm = 0.0 await = 0.0 r_await = 0.0 w_await = 0.0 if tput: svctm = util / tput rd_ticks = stats.get("msec_read") wr_ticks = stats.get("msec_write") prev_rd_ticks = prev_stats[device].get("msec_read") prev_wr_ticks = prev_stats[device].get("msec_write") if rd_ios != prev_rd_ios: r_await = (float(rd_ticks) - float(prev_rd_ticks)) / float(rd_ios - prev_rd_ios) if wr_ios != prev_wr_ios: w_await = (float(wr_ticks) - float(prev_wr_ticks)) / float(wr_ios - prev_wr_ios) if nr_ios != prev_nr_ios: await = (float(rd_ticks) + float(wr_ticks) - float(prev_rd_ticks) - float(prev_wr_ticks)) / float(nr_ios - prev_nr_ios) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "svctm", ts, svctm, device)) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "r_await", ts, r_await, device)) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "w_await", ts, w_await, device)) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "await", ts, await, device)) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "util", ts, float(util / 1000.0), device)) prev_stats[device] = copy.deepcopy(stats) elif len(values) == 7: # partial stats line for i in range(4): self._readq.nput("%s%s %d %s dev=%s" % (metric, FIELDS_PART[i], ts, values[i + 3], device)) else: self.log_error("Cannot parse /proc/diskstats line: %s", line) continue
def __call__(self): with utils.lower_privileges(self._logger): # proc.uptime self.f_uptime.seek(0) ts = int(time.time()) for line in self.f_uptime: m = re.match("(\S+)\s+(\S+)", line) if m: self._readq.nput("proc.uptime.total %d %s" % (ts, m.group(1))) self._readq.nput("proc.uptime.now %d %s" % (ts, m.group(2))) # proc.meminfo self.f_meminfo.seek(0) ts = int(time.time()) for line in self.f_meminfo: m = re.match("(\w+):\s+(\d+)\s+(\w+)", line) if m: if m.group(3).lower() == 'kb': # convert from kB to B for easier graphing value = str(int(m.group(2)) * 1024) else: value = m.group(2) self._readq.nput("proc.meminfo.%s %d %s" % (m.group(1).lower(), ts, value)) # proc.vmstat self.f_vmstat.seek(0) ts = int(time.time()) for line in self.f_vmstat: m = re.match("(\w+)\s+(\d+)", line) if not m: continue if m.group(1) in ("pgpgin", "pgpgout", "pswpin", "pswpout", "pgfault", "pgmajfault"): self._readq.nput("proc.vmstat.%s %d %s" % (m.group(1), ts, m.group(2))) # proc.stat self.f_stat.seek(0) ts = int(time.time()) for line in self.f_stat: m = re.match("(\w+)\s+(.*)", line) if not m: continue if m.group(1).startswith("cpu"): cpu_m = re.match("cpu(\d+)", m.group(1)) if cpu_m: metric_percpu = '.percpu' tags = ' cpu=%s' % cpu_m.group(1) else: metric_percpu = '' tags = '' fields = m.group(2).split() cpu_types = [ 'user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq', 'guest', 'guest_nice' ] # We use zip to ignore fields that don't exist. for value, field_name in zip(fields, cpu_types): self._readq.nput( "proc.stat.cpu%s %d %s type=%s%s" % (metric_percpu, ts, value, field_name, tags)) elif m.group(1) == "intr": self._readq.nput("proc.stat.intr %d %s" % (ts, m.group(2).split()[0])) elif m.group(1) == "ctxt": self._readq.nput("proc.stat.ctxt %d %s" % (ts, m.group(2))) elif m.group(1) == "processes": self._readq.nput("proc.stat.processes %d %s" % (ts, m.group(2))) elif m.group(1) == "procs_blocked": self._readq.nput("proc.stat.procs_blocked %d %s" % (ts, m.group(2))) self.f_loadavg.seek(0) ts = int(time.time()) for line in self.f_loadavg: m = re.match("(\S+)\s+(\S+)\s+(\S+)\s+(\d+)/(\d+)\s+", line) if not m: continue self._readq.nput("proc.loadavg.1min %d %s" % (ts, m.group(1))) self._readq.nput("proc.loadavg.5min %d %s" % (ts, m.group(2))) self._readq.nput("proc.loadavg.15min %d %s" % (ts, m.group(3))) self._readq.nput("proc.loadavg.runnable %d %s" % (ts, m.group(4))) self._readq.nput("proc.loadavg.total_threads %d %s" % (ts, m.group(5))) self.f_entropy_avail.seek(0) ts = int(time.time()) for line in self.f_entropy_avail: self._readq.nput("proc.kernel.entropy_avail %d %s" % (ts, line.strip())) self.f_interrupts.seek(0) ts = int(time.time()) # Get number of CPUs from description line. num_cpus = len(self.f_interrupts.readline().split()) for line in self.f_interrupts: cols = line.split() irq_type = cols[0].rstrip(":") if irq_type.isalnum(): if irq_type.isdigit(): if cols[-2] == "PCI-MSI-edge" and "eth" in cols[-1]: irq_type = cols[-1] else: continue # Interrupt type is just a number, ignore. for i, val in enumerate(cols[1:]): if i >= num_cpus: # All values read, remaining cols contain textual # description break if not val.isdigit(): # something is weird, there should only be digit values self.log_error( "Unexpected interrupts value %r in %r: ", val, cols) break self._readq.nput( "proc.interrupts %s %s type=%s cpu=%s" % (ts, val, irq_type, i)) self.f_softirqs.seek(0) ts = int(time.time()) # Get number of CPUs from description line. num_cpus = len(self.f_softirqs.readline().split()) for line in self.f_softirqs: cols = line.split() irq_type = cols[0].rstrip(":") for i, val in enumerate(cols[1:]): if i >= num_cpus: # All values read, remaining cols contain textual # description break if not val.isdigit(): # something is weird, there should only be digit values self.log_error("Unexpected softirq value %r in %r: ", val, cols) break self._readq.nput("proc.softirqs %s %s type=%s cpu=%s" % (ts, val, irq_type, i)) self._print_numa_stats(self.numastats) # Print scaling stats ts = int(time.time()) for cpu_no in self.f_scaling_min.keys(): f = self.f_scaling_min[cpu_no] f.seek(0) for line in f: self._readq.nput("proc.scaling.min %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no)) ts = int(time.time()) for cpu_no in self.f_scaling_max.keys(): f = self.f_scaling_max[cpu_no] f.seek(0) for line in f: self._readq.nput("proc.scaling.max %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no)) ts = int(time.time()) for cpu_no in self.f_scaling_cur.keys(): f = self.f_scaling_cur[cpu_no] f.seek(0) for line in f: self._readq.nput("proc.scaling.cur %d %s cpu=%s" % (ts, line.rstrip('\n'), cpu_no)) self._readq.nput("procstats.state %s %s" % (int(time.time()), '0'))
def __init__(self, config, logger, readq): super(DockerAlauda, self).__init__(config, logger, readq) with utils.lower_privileges(self._logger): self._init_alauda_session()
def main(): if not (tcp_bridge_conf and tcp_bridge_conf.enabled()): print >> sys.stderr, 'not enabled, or tcp_bridge_conf unavilable' sys.exit(13) with utils.lower_privileges(self._logger): def printm(string, time, value): out.write(m_namespace+string+' '+str(time)+' '+str(value)+'\n') def printmetrics(): global m_delay global m_last ts = int(time.time()) if ts > m_last+m_delay: printm('lines_read', ts, m_lines) printm('connections_processed', ts, m_connections) printm('processing_time', ts, m_ptime) printm('active', ts, 1) m_last = ts def clientthread(connection): global m_lines global m_connections global m_ptime start = time.time() f = connection.makefile() while True: data = f.readline() if not data: break data = removePut(data) out.write(data) m_lines += 1 f.close() connection.close() end = time.time() m_ptime += (end - start) m_connections += 1 printmetrics() def removePut(line): if line.startswith('put '): return line[4:] else: return line try: if tcp_bridge_conf.port(): PORT = tcp_bridge_conf.port() if tcp_bridge_conf.host(): HOST = tcp_bridge_conf.host() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind((HOST, PORT)) sock.listen(1) except socket.error, msg: utils.err('could not open socket: %s' % msg) sys.exit(1) try: flush_delay = tcp_bridge_conf.flush_delay() except AttributeError: flush_delay = 60 try: try: while 1: connection, address = sock.accept() start_new_thread(clientthread, (connection,)) except KeyboardInterrupt: utils.err("keyboard interrupt, exiting") finally: sock.close()
def __call__(self): init_stats = { "read_requests": 0, "read_merged": 0, "read_sectors": 0, "msec_read": 0, "write_requests": 0, "write_merged": 0, "write_sectors": 0, "msec_write": 0, "ios_in_progress": 0, "msec_total": 0, "msec_weighted_total": 0, } prev_stats = dict() with utils.lower_privileges(self._logger): self.f_diskstats.seek(0) ts = int(time.time()) itv = read_uptime()[0] for line in self.f_diskstats: # maj, min, devicename, [list of stats, see above] values = line.split(None) # shortcut the deduper and just skip disks that # haven't done a single read. This eliminates a bunch # of loopback, ramdisk, and cdrom devices but still # lets us report on the rare case that we actually use # a ramdisk. if values[3] == "0": continue if int(values[1]) % 16 == 0 and int(values[0]) > 1: metric = "iostat.disk." else: metric = "iostat.part." device = values[2] if len(values) == 14: # full stats line for i in range(11): self._readq.nput("%s%s %d %s dev=%s" % (metric, FIELDS_DISK[i], ts, values[i + 3], device)) ret = is_device(device, 0) # if a device or a partition, calculate the svctm/await/util if ret: stats = dict(zip(FIELDS_DISK, values[3:])) if device not in prev_stats: prev_stats[device] = init_stats rd_ios = float(stats.get("read_requests")) wr_ios = float(stats.get("write_requests")) nr_ios = rd_ios + wr_ios prev_rd_ios = float(prev_stats[device].get("read_requests")) prev_wr_ios = float(prev_stats[device].get("write_requests")) prev_nr_ios = prev_rd_ios + prev_wr_ios tput = ((nr_ios - prev_nr_ios) * float(self.hz) / float(itv)) util = ((float(stats.get("msec_total")) - float(prev_stats[device].get("msec_total"))) * float(self.hz) / float(itv)) svctm = 0.0 await = 0.0 r_await = 0.0 w_await = 0.0 if tput: svctm = util / tput rd_ticks = stats.get("msec_read") wr_ticks = stats.get("msec_write") prev_rd_ticks = prev_stats[device].get("msec_read") prev_wr_ticks = prev_stats[device].get("msec_write") if rd_ios != prev_rd_ios: r_await = (float(rd_ticks) - float(prev_rd_ticks)) / float(rd_ios - prev_rd_ios) if wr_ios != prev_wr_ios: w_await = (float(wr_ticks) - float(prev_wr_ticks)) / float(wr_ios - prev_wr_ios) if nr_ios != prev_nr_ios: await = (float(rd_ticks) + float(wr_ticks) - float(prev_rd_ticks) - float(prev_wr_ticks)) / float(nr_ios - prev_nr_ios) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "svctm", ts, svctm, device)) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "r_await", ts, r_await, device)) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "w_await", ts, w_await, device)) self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "await", ts, await, device)) # can't work for our #self._readq.nput("%s%s %d %.2f dev=%s" % (metric, "util", ts, float(util / 1000.0), device)) prev_stats[device] = copy.deepcopy(stats) elif len(values) == 7: # partial stats line for i in range(4): self._readq.nput("%s%s %d %s dev=%s" % (metric, FIELDS_PART[i], ts, values[i + 3], device)) else: self.log_error("Cannot parse /proc/diskstats line: %s", line) continue
def __call__(self): with utils.lower_privileges(self._logger): ret_metrics = [] devices = [] self.f_mounts.seek(0) ts = int(time.time()) for line in self.f_mounts: # Docs come from the fstab(5) # fs_spec # Mounted block special device or remote filesystem # fs_file # Mount point # fs_vfstype # File system type # fs_mntops # Mount options # fs_freq # Dump(8) utility flags # fs_passno # Order in which filesystem checks are done at reboot time try: fs_spec, fs_file, fs_vfstype, fs_mntops, fs_freq, fs_passno = line.split(None) except ValueError, e: self.log_exception("can't parse line at /proc/mounts.") continue if fs_spec == "none": continue elif fs_vfstype in FSTYPE_IGNORE or fs_vfstype.startswith("fuse."): continue # startswith(tuple) avoided to preserve support of Python 2.4 elif fs_file.startswith("/dev") or fs_file.startswith("/sys") or \ fs_file.startswith("/proc") or fs_file.startswith("/lib") or \ fs_file.startswith("net:"): continue # keep /dev/xxx device with shorter fs_file (remove mount binds) device_found = False if fs_spec.startswith("/dev"): for device in devices: if fs_spec == device[0]: device_found = True if len(fs_file) < len(device[1]): device[1] = fs_file break if not device_found: devices.append([fs_spec, fs_file, fs_vfstype]) else: devices.append([fs_spec, fs_file, fs_vfstype]) for device in devices: fs_spec, fs_file, fs_vfstype = device try: r = os.statvfs(fs_file) except OSError, e: self.log_exception("can't get info for mount point: %s: %s" % (fs_file, e)) continue used = r.f_blocks - r.f_bfree # conditional expression avoided to preserve support of Python 2.4 # percent_used = 100 if r.f_blocks == 0 else used * 100.0 / r.f_blocks if r.f_blocks == 0: percent_used = 100 else: percent_used = used * 100.0 / r.f_blocks self._readq.nput("df.bytes.total %d %s mount=%s fstype=%s" % (ts, r.f_frsize * r.f_blocks, fs_file, fs_vfstype)) self._readq.nput("df.bytes.used %d %s mount=%s fstype=%s" % (ts, r.f_frsize * used, fs_file, fs_vfstype)) self._readq.nput("df.bytes.percentused %d %s mount=%s fstype=%s" % (ts, percent_used, fs_file, fs_vfstype)) self._readq.nput("df.bytes.free %d %s mount=%s fstype=%s" % (ts, r.f_frsize * r.f_bfree, fs_file, fs_vfstype)) used = r.f_files - r.f_ffree # percent_used = 100 if r.f_files == 0 else used * 100.0 / r.f_files if r.f_files == 0: percent_used = 100 else: percent_used = used * 100.0 / r.f_files self._readq.nput("df.inodes.total %d %s mount=%s fstype=%s" % (ts, r.f_files, fs_file, fs_vfstype)) self._readq.nput("df.inodes.used %d %s mount=%s fstype=%s" % (ts, used, fs_file, fs_vfstype)) self._readq.nput("df.inodes.percentused %d %s mount=%s fstype=%s" % (ts, percent_used, fs_file, fs_vfstype)) self._readq.nput("df.inodes.free %d %s mount=%s fstype=%s" % (ts, r.f_ffree, fs_file, fs_vfstype))
def main(): if not (tcp_bridge_conf and tcp_bridge_conf.enabled()): print >> sys.stderr, 'not enabled, or tcp_bridge_conf unavilable' sys.exit(13) with utils.lower_privileges(self._logger): def printm(string, time, value): out.write(m_namespace + string + ' ' + str(time) + ' ' + str(value) + '\n') def printmetrics(): global m_delay global m_last ts = int(time.time()) if ts > m_last + m_delay: printm('lines_read', ts, m_lines) printm('connections_processed', ts, m_connections) printm('processing_time', ts, m_ptime) printm('active', ts, 1) m_last = ts def clientthread(connection): global m_lines global m_connections global m_ptime start = time.time() f = connection.makefile() while True: data = f.readline() if not data: break data = removePut(data) out.write(data) m_lines += 1 f.close() connection.close() end = time.time() m_ptime += (end - start) m_connections += 1 printmetrics() def removePut(line): if line.startswith('put '): return line[4:] else: return line try: if tcp_bridge_conf.port(): PORT = tcp_bridge_conf.port() if tcp_bridge_conf.host(): HOST = tcp_bridge_conf.host() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind((HOST, PORT)) sock.listen(1) except socket.error, msg: utils.err('could not open socket: %s' % msg) sys.exit(1) try: flush_delay = tcp_bridge_conf.flush_delay() except AttributeError: flush_delay = 60 try: try: while 1: connection, address = sock.accept() start_new_thread(clientthread, (connection, )) except KeyboardInterrupt: utils.err("keyboard interrupt, exiting") finally: sock.close()