Python err Examples, collectors.lib.utils.err Python Examples

Example #1

0

Show file

File: dfstat.py Project: hynd/tcollector

def main():
  """dfstats main loop"""
  try:
    f_mounts = open("/proc/mounts", "r")
  except IOError, e:
    utils.err("error: can't open /proc/mounts: %s" % e)
    return 13 # Ask tcollector to not respawn us

Example #2

0

Show file

def find_conf_file(pid):
    """Returns config file for couchbase-server."""
    try:
        fd = open('/proc/%s/cmdline' % pid)
    except IOError, e:
        utils.err("Couchbase (pid %s) went away ? %s" % (pid, e))
        return

Example #3

0

Show file

File: mysql.py Project: CheRuisiBesares/tcollector

def find_databases(dbs=None):
  """Returns a map of dbname (string) to DB instances to monitor.

  Args:
    dbs: A map of dbname (string) to DB instances already monitored.
      This map will be modified in place if it's not None.
  """
  sockfiles = find_sockfiles()
  if dbs is None:
    dbs = {}
  for sockfile in sockfiles:
    dbname = get_dbname(sockfile)
    if dbname in dbs:
      continue
    if not dbname:
      continue
    try:
      db = mysql_connect(sockfile)
      cursor = db.cursor()
      cursor.execute("SELECT VERSION()")
    except (EnvironmentError, EOFError, RuntimeError, socket.error,
            MySQLdb.MySQLError), e:
      utils.err("Couldn't connect to %s: %s" % (sockfile, e))
      continue
    version = cursor.fetchone()[0]
    dbs[dbname] = DB(sockfile, dbname, db, cursor, version)

Example #4

0

Show file

File: haproxy.py Project: spothineni/bbm-tcollector

def read_socket(sock):
    """
    Connect to the HAProxy stats socket and ready the data from the show stat
    command, allowing up to three retries before aborting. This setup assumes
    that the socket will be closed and doesn't try to keep it open, reconnecting
    on each attempt to fetch the statistics. (Should better handle restarts
    and reloads of the monitored process.)
    """

    stats = ''

    # Establish a socket to connect to the unix socket on HAProxy
    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    sock.connect(DEFAULT_SOCKET)

    for attempt in range(3):
        try:
            sock.send("show stat\n")
            data = sock.recv(4096)
            while data:
                stats += data
                data = sock.recv(4096)
            return stats.split("\n")
        except IOError, error:
            utils.err("Error: Connection to HAProxy socket lost: %s (%d)" %
                      (error, attempt))
            sock.close()
            sock.connect(DEFAULT_SOCKET)
            # Reset stats in case it was broken mid-stream
            stats = ''

Example #5

0

Show file

def main(args):
    """Collects and dumps stats from a MySQL server."""
    if not find_sockfiles():  # Nothing to monitor.
        return 13  # Ask tcollector to not respawn us.
    if MySQLdb is None:
        utils.err("error: Python module `MySQLdb' is missing")
        return 1

    last_db_refresh = now()
    dbs = find_databases()
    while True:
        ts = now()
        if ts - last_db_refresh >= DB_REFRESH_INTERVAL:
            find_databases(dbs)
            last_db_refresh = ts

        errs = []
        for dbname, db in dbs.iteritems():
            try:
                collect(db)
            except (EnvironmentError, EOFError, RuntimeError, socket.error,
                    MySQLdb.MySQLError), e:
                if isinstance(e, IOError) and e[0] == errno.EPIPE:
                    # Exit on a broken pipe.  There's no point in continuing
                    # because no one will read our stdout anyway.
                    return 2
                utils.err("error: failed to collect data from %s: %s" %
                          (db, e))
                errs.append(dbname)

        for dbname in errs:
            del dbs[dbname]

        sys.stdout.flush()
        time.sleep(COLLECTION_INTERVAL)

Example #6

0

Show file

File: haproxy.py Project: willmpls/tcollector

def main():
    pid = haproxy_pid()
    if not pid:
        utils.err("Error: HAProxy is not running")
        return 13  # Ask tcollector to not respawn us.

    conf_file = find_conf_file(pid)
    if not conf_file:
        return 13

    sock_file = find_sock_file(conf_file)
    if sock_file is None:
        utils.err("Error: HAProxy is not listening on any unix domain socket")
        return 13

    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    sock.connect(sock_file)

    # put haproxy to interactive mode, otherwise haproxy closes
    # connection after first command.
    # See haproxy documentation section 9.2. Unix Socket commands.
    sock.send("prompt\n")

    while True:
        collect_stats(sock)
        time.sleep(COLLECTION_INTERVAL)

Example #7

0

Show file

def main():
    if json is None:
        utils.err("This collector requires the 'json' Python module.")
        return 13
    while True:
        read_impala_log()
        time.sleep(1)

Example #8

0

Show file

def cloudwatch_query_metric(cloudwatch, region, metric):
    end = datetime.datetime.utcnow()
    start = end - datetime.timedelta(seconds=COLLECTION_INTERVAL)
    global STATISTICS
    # TODO: statistics no longer need to be one at at time so refactor that
    response = cloudwatch.get_metric_statistics(
        Namespace=metric["Namespace"],
        MetricName=metric["MetricName"],
        Dimensions=metric["Dimensions"],
        StartTime=start,
        EndTime=end,
        Period=300,
        Statistics=list(STATISTICS),
        Unit='Count'
    )

    for datapoint in response['Datapoints']:
        for statistic in STATISTICS:
            timestamp = format_timestamp(str(datapoint['Timestamp']))
            value = int(datapoint[statistic])
            metric_name, tags = build_tag_list(metric['MetricName'].lower(), region, metric['Dimensions'])
            namespace = metric["Namespace"].lower().replace('/', '.')
            output = "%s.%s.%s %s %s %s" % (
                namespace, metric_name, statistic.lower(), str(timestamp),
                str(value),
                tags)
            #sys.stderr.write('output: %s\n' % (output))
            if validate_line_parses(output):
                sendQueue.put({'timestamp': timestamp, 'output': output})
            else:
                utils.err("Invalid Line: %s" % output)

Example #9

0

Show file

File: haproxy.py Project: we7/bbm-tcollector

def read_socket(sock):
    """
    Connect to the HAProxy stats socket and ready the data from the show stat
    command, allowing up to three retries before aborting. This setup assumes
    that the socket will be closed and doesn't try to keep it open, reconnecting
    on each attempt to fetch the statistics. (Should better handle restarts
    and reloads of the monitored process.)
    """

    stats = ''

    # Establish a socket to connect to the unix socket on HAProxy
    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    sock.connect(DEFAULT_SOCKET)

    for attempt in range(3):
        try:
            sock.send("show stat\n")
            data = sock.recv(4096)
            while data:
                stats += data
                data = sock.recv(4096)
            return stats.split("\n")
        except IOError, error:
            utils.err("Error: Connection to HAProxy socket lost: %s (%d)"
                        % (error, attempt))
            sock.close()
            sock.connect(DEFAULT_SOCKET)
            # Reset stats in case it was broken mid-stream
            stats = ''

Example #10

0

Show file

File: elasticsearch.py Project: wangy1931/tcollector

def main(argv):
  with utils.lower_privileges(self._logger):
      socket.setdefaulttimeout(DEFAULT_TIMEOUT)
      servers = []

      if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 1

      for conf in elasticsearch_conf.get_servers():
        server = httplib.HTTPConnection( *conf )
        try:
          server.connect()
        except socket.error, (erno, e):
          if erno == errno.ECONNREFUSED:
            continue
          raise
        servers.append( server )

      if len( servers ) == 0:
        return 13  # No ES running, ask tcollector to not respawn us.

      status = node_status(server)
      version = status["version"]["number"]

      while True:
        for server in servers:
          _collect_server(server, version)
        time.sleep(COLLECTION_INTERVAL)

Example #11

0

Show file

File: elasticsearch.py Project: think8848/tcollector

def main(argv):
    utils.drop_privileges()
    socket.setdefaulttimeout(DEFAULT_TIMEOUT)
    servers = []

    if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 1

    for conf in elasticsearch_conf.get_servers():
        server = HTTPConnection(*conf)
        try:
            server.connect()
        except socket.error as exc:
            if exc.errno == errno.ECONNREFUSED:
                continue
            raise
        servers.append(server)

    if len(servers) == 0:
        return 13  # No ES running, ask tcollector to not respawn us.

    lock = threading.Lock()
    while True:
        threads = []
        for server in servers:
            status = node_status(server)
            version = status["version"]["number"]
            t = threading.Thread(target=_collect_server,
                                 args=(server, version, lock))
            t.start()
            threads.append(t)
        for thread in threads:
            thread.join()
        time.sleep(COLLECTION_INTERVAL)

Example #12

0

Show file

File: kafka_jmx_monitor.py Project: optimizely/tcollector

    def process_metric(self, timestamp, metric, tags, value, mbean_domain, mbean_properties):
        if not mbean_domain.startswith("kafka") and not mbean_domain == "java.lang":
            utils.err("Unexpected mbean domain = %r" % mbean_domain)
            return

        if mbean_domain == "java.lang":
            jmx_service = mbean_properties.pop("type", "jvm")
        # Kafka producer metrics
        elif mbean_domain == "kafka.producer":
            self._process_kafka_producer_metric(timestamp, metric, tags, value, mbean_domain, mbean_properties)
            return
        # Kafka consumer metrics
        elif mbean_domain == "kafka.consumer":
            self._process_kafka_consumer_metric(timestamp, metric, tags, value, mbean_domain, mbean_properties)
            return
        # Kafka broker metrics
        elif mbean_domain.startswith("kafka."):
            domain_parts = mbean_domain.split(".")
            # drop the kafka prefix
            mbean_domain = mbean_domain[len("kafka."):]
            jmx_service = mbean_properties.get("type", domain_parts[-1])
        else:
            return

        if mbean_properties:
            tags += " " + " ".join(k + "=" + v for k, v in
                                   mbean_properties.iteritems())

        jmx_service = JmxMonitor.SHORT_SERVICE_NAMES.get(jmx_service, jmx_service)
        metric = mbean_domain + "." + jmx_service.lower() + "." + metric

        self.emit(metric, timestamp, value, tags)

Example #13

0

Show file

def main(argv):
    with utils.lower_privileges(self._logger):
        socket.setdefaulttimeout(DEFAULT_TIMEOUT)
        servers = []

        if json is None:
            utils.err("This collector requires the `json' Python module.")
            return 1

        for conf in elasticsearch_conf.get_servers():
            server = httplib.HTTPConnection(*conf)
            try:
                server.connect()
            except socket.error, (erno, e):
                if erno == errno.ECONNREFUSED:
                    continue
                raise
            servers.append(server)

        if len(servers) == 0:
            return 13  # No ES running, ask tcollector to not respawn us.

        status = node_status(server)
        version = status["version"]["number"]

        while True:
            for server in servers:
                _collect_server(server, version)
            time.sleep(COLLECTION_INTERVAL)

Example #14

0

Show file

File: dfstat.py Project: neilfulwiler/tcollector

def main():
    """dfstats main loop"""
    try:
        f_mounts = open("/proc/mounts", "r")
    except IOError, e:
        utils.err("error: can't open /proc/mounts: %s" % e)
        return 13  # Ask tcollector to not respawn us

Example #15

0

Show file

def get_metrics(webserver_url, username, password, params):
    try:
        r = requests.get(webserver_url,
                         auth=(username, password),
                         verify=False,
                         params=params)
    except requests.exceptions.ConnectionError as error:
        print >> sys.stderr, "Error connecting: %s" % error
        utils.err("Connection error: %s" % error)
        raise

    try:
        r.raise_for_status()
    except requests.exceptions.HTTPError as error:
        print >> sys.stderr, "Request was not successful: %s" % error
        utils.err("HTTP error getting metrics from '%s' - %s" %
                  (webserver_url, error))
        return 13  # tell tcollector to not respawn

    response = r.json()
    try:
        data = response['data']
    except KeyError as e:
        print >> sys.stderr, "Did not get a 'data' key in the response."
        print >> sys.stderr, response
        raise
    return data

Example #16

0

Show file

File: haproxy.py Project: anrs/tcollector

def find_sock_file(conf_file):
  """Returns the unix socket file of haproxy."""
  try:
    fd = open(conf_file)
  except IOError, e:
    utils.err("Error: %s. Config file path is relative: %s" % (e, conf_file))
    return None

Example #17

0

Show file

File: couchbase.py Project: axibase/axibase-puppet-modules

def find_bindir_path(config_file):
  """Returns the bin directory path"""
  try:
    fd = open(config_file)
  except IOError, e:
    utils.err("Error for Config file (%s): %s" % (config_file, e))
    return None

Example #18

0

Show file

File: haproxy.py Project: CheRuisiBesares/tcollector

def main():
  pid = haproxy_pid()
  if not pid:
    utils.err("Error: HAProxy is not running")
    return 13  # Ask tcollector to not respawn us.

  conf_file = find_conf_file(pid)
  if not conf_file:
    return 13

  sock_file = find_sock_file(conf_file)
  if sock_file is None:
    utils.err("Error: HAProxy is not listening on any unix domain socket")
    return 13

  sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
  sock.connect(sock_file)

  # put haproxy to interactive mode, otherwise haproxy closes
  # connection after first command.
  # See haproxy documentation section 9.2. Unix Socket commands.
  sock.send("prompt\n")

  while True:
    collect_stats(sock)
    time.sleep(COLLECTION_INTERVAL)

Example #19

0

Show file

File: couchbase.py Project: axibase/axibase-puppet-modules

def find_conf_file(pid):
  """Returns config file for couchbase-server."""
  try:
    fd = open('/proc/%s/cmdline' % pid)
  except IOError, e:
    utils.err("Couchbase (pid %s) went away ? %s" % (pid, e))
    return

Example #20

0

Show file

File: libvirt_vm.py Project: Misenko/tcollector

def main():
    try:
        check_imports()

        conn = libvirt.openReadOnly(LIBVIRT_URI)
        if conn is None:
            utils.err("Failed to open connection to the hypervisor")
            return ERROR_CODE_DONT_RETRY

        while True:
            domains = conn.listAllDomains()
            random.shuffle(domains)
            pids = get_pids()

            count = 0
            for domain in domains:
                if process_domain(domain, pids.get(domain.UUIDString())):
                    count += 1  # count only successfully processed VMs

            # write libvirt.vm.count metric
            print("%s %d %s" % (FIELDS["count"], int(time.time()), count))

            sys.stdout.flush()
            time.sleep(INTERVAL)

    except LibvirtVmProcessingError as err:
        utils.err(err.value)
        return ERROR_CODE_DONT_RETRY

Example #21

0

Show file

def main(args):
    """ Calls HadoopYarnNodeManager at interval secs
      and emits metrics to stdout for TCollector """
    if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 13  # Ask tcollector not to respawn us
    utils.drop_privileges()
    parser = argparse.ArgumentParser()
    parser.add_argument('-H',
                        '--host',
                        default='localhost',
                        help='Host to connect to (default: localhost)')
    parser.add_argument('-P',
                        '--port',
                        default=8042,
                        type=int,
                        help='Port to connect to (default: 8042)')
    parser.add_argument('-i',
                        '--interval',
                        default=90,
                        type=int,
                        help='Interval at which to emit metrics')
    args = parser.parse_args(args[1:])
    host = args.host
    port = args.port
    interval = args.interval
    yarn_service = HadoopYarnNodeManager(host=host, port=port)
    while True:
        yarn_service.emit()
        time.sleep(interval)
    return 0

Example #22

0

Show file

File: haproxy.py Project: anrs/tcollector

def find_conf_file(pid):
  """Returns the conf file of haproxy."""
  try:
     output = subprocess.check_output(["ps", "--no-headers", "-o", "cmd", pid])
  except subprocess.CalledProcessError, e:
     utils.err("HAProxy (pid %s) went away? %s" % (pid, e))
     return None

Example #23

0

Show file

def find_databases(dbs=None):
    """Returns a map of dbname (string) to DB instances to monitor.

  Args:
    dbs: A map of dbname (string) to DB instances already monitored.
      This map will be modified in place if it's not None.
  """
    sockfiles = find_sockfiles()
    if dbs is None:
        dbs = {}
    for sockfile in sockfiles:
        dbname = get_dbname(sockfile)
        if dbname in dbs:
            continue
        if not dbname:
            continue
        try:
            db = mysql_connect(sockfile)
            cursor = db.cursor()
            cursor.execute("SELECT VERSION()")
        except (EnvironmentError, EOFError, RuntimeError, socket.error,
                MySQLdb.MySQLError), e:
            utils.err("Couldn't connect to %s: %s" % (sockfile, e))
            continue
        version = cursor.fetchone()[0]
        dbs[dbname] = DB(sockfile, dbname, db, cursor, version)

Example #24

0

Show file

File: mysql.py Project: CheRuisiBesares/tcollector

def main(args):
  """Collects and dumps stats from a MySQL server."""
  if not find_sockfiles():  # Nothing to monitor.
    return 13               # Ask tcollector to not respawn us.
  if MySQLdb is None:
    utils.err("error: Python module `MySQLdb' is missing")
    return 1

  last_db_refresh = now()
  dbs = find_databases()
  while True:
    ts = now()
    if ts - last_db_refresh >= DB_REFRESH_INTERVAL:
      find_databases(dbs)
      last_db_refresh = ts

    errs = []
    for dbname, db in dbs.iteritems():
      try:
        collect(db)
      except (EnvironmentError, EOFError, RuntimeError, socket.error,
              MySQLdb.MySQLError), e:
        if isinstance(e, IOError) and e[0] == errno.EPIPE:
          # Exit on a broken pipe.  There's no point in continuing
          # because no one will read our stdout anyway.
          return 2
        utils.err("error: failed to collect data from %s: %s" % (db, e))
        errs.append(dbname)

    for dbname in errs:
      del dbs[dbname]

    sys.stdout.flush()
    time.sleep(COLLECTION_INTERVAL)

Example #25

0

Show file

File: elasticsearch.py Project: OpenTSDB/tcollector

def main(argv):
  utils.drop_privileges()
  socket.setdefaulttimeout(DEFAULT_TIMEOUT)
  servers = []

  if json is None:
    utils.err("This collector requires the `json' Python module.")
    return 1

  for conf in elasticsearch_conf.get_servers():
    server = HTTPConnection( *conf )
    try:
      server.connect()
    except socket.error as exc:
      if exc.errno == errno.ECONNREFUSED:
        continue
      raise
    servers.append( server )

  if len( servers ) == 0:
    return 13  # No ES running, ask tcollector to not respawn us.

  lock = threading.Lock()
  while True:
    threads = []
    for server in servers:
      status = node_status(server)
      version = status["version"]["number"]
      t = threading.Thread(target = _collect_server, args = (server, version, lock))
      t.start()
      threads.append(t)
    for thread in threads:
      thread.join()
    time.sleep(COLLECTION_INTERVAL)

Example #26

0

Show file

def find_bindir_path(config_file):
    """Returns the bin directory path"""
    try:
        fd = open(config_file)
    except IOError, e:
        utils.err("Error for Config file (%s): %s" % (config_file, e))
        return None

Example #27

0

Show file

File: g1gc.py Project: xkswz/tcollector

def process_gc_log(collector):

    prefix = collector['prefix']
    # get latest gc log to process
    gc_log = get_latest_gc_log(collector['log_dir'],
                               collector['log_name_pattern'])

    # update current_file and current_file_pos if this is the first time to
    # process the gc log
    if collector['current_file'] != gc_log:
        collector['current_file'] = gc_log
        with open(gc_log, 'rb') as file_handler:
            collector['current_file_pos'] = get_file_end(file_handler)
        return
    try:
        with open(gc_log, 'rb') as file_handler:

            pos = collector['current_file_pos']
            collector['current_file_pos'] = get_file_end(file_handler)
            file_handler.seek(pos)

            # Do not use foreach loop because inside function process_gc_record
            # will call file_handler.readline(). The reason is that some GC
            # event are multiline and need to be processed as a whole
            while True:
                line = file_handler.readline()
                if len(line) == 0:
                    break
                pattern_name, matcher = match_pattern(line)
                if pattern_name == GC_START_TIME_PATTERN:
                    year, month, day, hour, minute, second, timezone = [
                        int(matcher.group(i)) for i in range(1, 8)
                    ]
                    cause = matcher.group(8)
                    timestamp = true_unix_timestamp(year, month, day, hour,
                                                    minute, second, timezone)
                    process_gc_record(prefix, file_handler, timestamp, cause,
                                      collector)
                else:
                    unmatched_gc_log(line)

        current_timestamp_in_sec = int(time.time())

        if not collector['timestamp'] is None:
            for gen, value in collector['gensize'].items():
                print "%s.gc.g1.gensize %s %s gen=%s" % (
                    prefix, current_timestamp_in_sec, value, gen)

        # publish gc event count metrics
        for event, value in collector['count'].items():
            print "%s.gc.g1.event.count %s %s event=%s" % (
                prefix, current_timestamp_in_sec, value, event)

    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        utils.err(''.join(
            traceback.format_exception(exc_type, exc_value, exc_traceback)))

    return 0

Example #28

0

Show file

File: zabbix_bridge.py Project: OpenTSDB/tcollector

def main():
    utils.drop_privileges()
    if BinLogStreamReader is None:
        utils.err("error: Python module `pymysqlreplication' is missing")
        return 1
    settings = zabbix_bridge_conf.get_settings()

    # Set blocking to True if you want to block and wait for the next event at
    # the end of the stream
    stream = BinLogStreamReader(connection_settings=settings['mysql'],
                                server_id=settings['slaveid'],
                                only_events=[WriteRowsEvent],
                                resume_stream=True,
                                blocking=True)

    db_filename = settings['sqlitedb']
    dbcache = sqlite3.connect(':memory:')
    cachecur = dbcache.cursor()
    cachecur.execute("ATTACH DATABASE '%s' as 'dbfile'" % (db_filename,))
    cachecur.execute('CREATE TABLE zabbix_cache AS SELECT * FROM dbfile.zabbix_cache')
    cachecur.execute('CREATE UNIQUE INDEX uniq_zid on zabbix_cache (id)')

    # tcollector.zabbix_bridge namespace for internal Zabbix bridge metrics.
    log_pos = 0
    key_lookup_miss = 0
    sample_last_ts = int(time.time())
    last_key_lookup_miss = 0

    for binlogevent in stream:
        if binlogevent.schema == settings['mysql']['db']:
            table = binlogevent.table
            log_pos = binlogevent.packet.log_pos
            if table == 'history' or table == 'history_uint':
                for row in binlogevent.rows:
                    r = row['values']
                    itemid = r['itemid']
                    cachecur.execute('SELECT id, key, host, proxy FROM zabbix_cache WHERE id=?', (itemid,))
                    row = cachecur.fetchone()
                    if (row is not None):
                        print("zbx.%s %d %s host=%s proxy=%s" % (row[1], r['clock'], r['value'], row[2], row[3]))
                        if ((int(time.time()) - sample_last_ts) > settings['internal_metric_interval']): # Sample internal metrics @ 10s intervals
                            sample_last_ts = int(time.time())
                            print("tcollector.zabbix_bridge.log_pos %d %s" % (sample_last_ts, log_pos))
                            print("tcollector.zabbix_bridge.key_lookup_miss %d %s" % (sample_last_ts, key_lookup_miss))
                            print("tcollector.zabbix_bridge.timestamp_drift %d %s" % (sample_last_ts, (sample_last_ts - r['clock'])))
                            if ((key_lookup_miss - last_key_lookup_miss) > settings['dbrefresh']):
                                print("tcollector.zabbix_bridge.key_lookup_miss_reload %d %s" % (sample_last_ts, (key_lookup_miss - last_key_lookup_miss)))
                                cachecur.execute('DROP TABLE zabbix_cache')
                                cachecur.execute('CREATE TABLE zabbix_cache AS SELECT * FROM dbfile.zabbix_cache')
                                cachecur.execute('CREATE UNIQUE INDEX uniq_zid on zabbix_cache (id)')
                                last_key_lookup_miss = key_lookup_miss
                    else:
                        # TODO: Consider https://wiki.python.org/moin/PythonDecoratorLibrary#Retry
                        utils.err("error: Key lookup miss for %s" % (itemid))
                        key_lookup_miss += 1
                sys.stdout.flush()

    dbcache.close()
    stream.close()

Example #29

0

Show file

File: haproxy.py Project: rshivane/xcollector

def find_sock_file(conf_file):
    """Returns the unix socket file of haproxy."""
    try:
        fd = open(conf_file)
    except IOError, e:
        utils.err("Error: %s. Config file path is relative: %s" %
                  (e, conf_file))
        return None

Example #30

0

Show file

File: haproxy.py Project: rshivane/xcollector

def find_conf_file(pid):
    """Returns the conf file of haproxy."""
    try:
        output = subprocess.check_output(
            ["ps", "--no-headers", "-o", "cmd", pid])
    except subprocess.CalledProcessError, e:
        utils.err("HAProxy (pid %s) went away? %s" % (pid, e))
        return None

Example #31

0

Show file

def main(args):
    """Collects and dumps stats from a PostgreSQL server."""

    try:
        db = postgresqlutils.connect()
    except (Exception), e:
        utils.err("error: Could not initialize collector : %s" % (e))
        return 13  # Ask tcollector to not respawn us

Example #32

0

Show file

File: ifstat.py Project: soarpenguin/tcollector

def main():
    """ifstat main loop"""

    try:
        f_netdev = open("/proc/net/dev")
    except IOError, e:
        utils.err("error: can't open /proc/net/dev: %s" % e)
        return 13 # Ask tcollector to not respawn us

Example #33

0

Show file

File: ifstat.py Project: soarpenguin/tcollector

def main():
    """ifstat main loop"""

    try:
        f_netdev = open("/proc/net/dev")
    except IOError, e:
        utils.err("error: can't open /proc/net/dev: %s" % e)
        return 13  # Ask tcollector to not respawn us

Example #34

0

Show file

def main():
    utils.drop_privileges()
    if BinLogStreamReader is None:
        utils.err("error: Python module `pymysqlreplication' is missing")
        return 1
    settings = zabbix_bridge_conf.get_settings()

    # Set blocking to True if you want to block and wait for the next event at
    # the end of the stream
    stream = BinLogStreamReader(connection_settings=settings['mysql'],
                                server_id=settings['slaveid'],
                                only_events=[WriteRowsEvent],
                                resume_stream=True,
                                blocking=True)

    db_filename = settings['sqlitedb']
    dbcache = sqlite3.connect(':memory:')
    cachecur = dbcache.cursor()
    cachecur.execute("ATTACH DATABASE '%s' as 'dbfile'" % (db_filename,))
    cachecur.execute('CREATE TABLE zabbix_cache AS SELECT * FROM dbfile.zabbix_cache')
    cachecur.execute('CREATE UNIQUE INDEX uniq_zid on zabbix_cache (id)')

    # tcollector.zabbix_bridge namespace for internal Zabbix bridge metrics.
    log_pos = 0
    key_lookup_miss = 0
    sample_last_ts = int(time.time())
    last_key_lookup_miss = 0

    for binlogevent in stream:
        if binlogevent.schema == settings['mysql']['db']:
            table = binlogevent.table
            log_pos = binlogevent.packet.log_pos
            if table == 'history' or table == 'history_uint':
                for row in binlogevent.rows:
                    r = row['values']
                    itemid = r['itemid']
                    cachecur.execute('SELECT id, key, host, proxy FROM zabbix_cache WHERE id=?', (itemid,))
                    row = cachecur.fetchone()
                    if (row is not None):
                        print "zbx.%s %d %s host=%s proxy=%s" % (row[1], r['clock'], r['value'], row[2], row[3])
                        if ((int(time.time()) - sample_last_ts) > settings['internal_metric_interval']): # Sample internal metrics @ 10s intervals
                            sample_last_ts = int(time.time())
                            print "tcollector.zabbix_bridge.log_pos %d %s" % (sample_last_ts, log_pos)
                            print "tcollector.zabbix_bridge.key_lookup_miss %d %s" % (sample_last_ts, key_lookup_miss)
                            print "tcollector.zabbix_bridge.timestamp_drift %d %s" % (sample_last_ts, (sample_last_ts - r['clock']))
                            if ((key_lookup_miss - last_key_lookup_miss) > settings['dbrefresh']):
                                print "tcollector.zabbix_bridge.key_lookup_miss_reload %d %s" % (sample_last_ts, (key_lookup_miss - last_key_lookup_miss))
                                cachecur.execute('DROP TABLE zabbix_cache')
                                cachecur.execute('CREATE TABLE zabbix_cache AS SELECT * FROM dbfile.zabbix_cache')
                                last_key_lookup_miss = key_lookup_miss
                    else:
                        # TODO: Consider https://wiki.python.org/moin/PythonDecoratorLibrary#Retry
                        utils.err("error: Key lookup miss for %s" % (itemid))
                        key_lookup_miss += 1
                sys.stdout.flush()

    dbcache.close()
    stream.close()

Example #35

0

Show file

File: mysql.py Project: CheRuisiBesares/tcollector

def get_dbname(sockfile):
  """Returns the name of the DB based on the path to the socket file."""
  if sockfile in DEFAULT_SOCKFILES:
    return "default"
  m = re.search("/mysql-(.+)/[^.]+\.sock$", sockfile)
  if not m:
    utils.err("error: couldn't guess the name of the DB for " + sockfile)
    return None
  return m.group(1)

Example #36

0

Show file

File: hadoop.py Project: jbnote/tcollector

 def loop(self):
     utils.drop_privileges()
     if json is None:
         utils.err("This collector requires the `json' Python module.")
         return 13  # Ask tcollector not to respawn us
     while True:
         self.emit()
         time.sleep(self.delay)
     return 0

Example #37

0

Show file

def get_dbname(sockfile):
    """Returns the name of the DB based on the path to the socket file."""
    if sockfile in DEFAULT_SOCKFILES:
        return "default"
    m = re.search("/mysql-(.+)/[^.]+\.sock$", sockfile)
    if not m:
        utils.err("error: couldn't guess the name of the DB for " + sockfile)
        return None
    return m.group(1)

Example #38

0

Show file

def validate_config():
    aws_profile = aws_cloudwatch_conf.get_aws_profile()
    access_key, secret_access_key = aws_cloudwatch_conf.get_accesskey_secretkey()
    if (access_key == '<access_key_id>' or secret_access_key == '<secret_access_key>') and aws_profile is None:
        utils.err("Cloudwatch Collector is not configured\n")
        sys.exit(13)
    if not aws_cloudwatch_conf.enabled:
        utils.err("Cloudwatch Collector is not enabled\n")
        sys.exit(13)

Example #39

0

Show file

File: postgresql.py Project: zer0n1/tcollector

def collect(db):
    """
  Collects and prints stats.

  Here we collect only general info, for full list of data for collection
  see http://www.postgresql.org/docs/9.2/static/monitoring-stats.html
  """

    try:
        cursor = db.cursor()

        # general statics
        cursor.execute(
            "SELECT pg_stat_database.*, pg_database_size"
            " (pg_database.datname) AS size FROM pg_database JOIN"
            " pg_stat_database ON pg_database.datname ="
            " pg_stat_database.datname WHERE pg_stat_database.datname"
            " NOT IN ('template0', 'template1', 'postgres')")
        ts = time.time()
        stats = cursor.fetchall()

        #  datid |  datname   | numbackends | xact_commit | xact_rollback | blks_read  |  blks_hit   | tup_returned | tup_fetched | tup_inserted | tup_updated | tup_deleted | conflicts | temp_files |  temp_bytes  | deadlocks | blk_read_time | blk_write_time |          stats_reset          |     size
        result = {}
        for stat in stats:
            database = stat[1]
            result[database] = stat

        for database in result:
            for i in range(2, len(cursor.description)):
                metric = cursor.description[i].name
                value = result[database][i]
                try:
                    if metric in ("stats_reset"):
                        continue
                    print("postgresql.%s %i %s database=%s" %
                          (metric, ts, value, database))
                except:
                    utils.err("got here")
                    continue

        # connections
        cursor.execute("SELECT datname, count(datname) FROM pg_stat_activity"
                       " GROUP BY pg_stat_activity.datname")
        ts = time.time()
        connections = cursor.fetchall()

        for database, connection in connections:
            print("postgresql.connections %i %s database=%s" %
                  (ts, connection, database))

    except (EnvironmentError, EOFError, RuntimeError, socket.error), e:
        if isinstance(e, IOError) and e[0] == errno.EPIPE:
            # exit on a broken pipe. There is no point in continuing
            # because no one will read our stdout anyway.
            return 2
        utils.err("error: failed to collect data: %s" % e)

Example #40

0

Show file

def main(args):
    utils.drop_privileges()
    if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 13  # Ask tcollector not to respawn us
    hbase_service = HBaseMaster()
    while True:
        hbase_service.emit()
        time.sleep(90)
    return 0

Example #41

0

Show file

File: hadoop_rm.py Project: whitelilis/op

def main(args):
    utils.drop_privileges()
    if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 13  # Ask tcollector not to respawn us
    rm_node_service = HadoopResourceManager()
    while True:
        rm_node_service.emit()
        time.sleep(90)
    return 0

Example #42

0

Show file

def validate_line_parses(line):
    parsed = re.match('^([-_./a-zA-Z0-9]+)\s+'  # Metric name.
                      '(\d+\.?\d+)\s+'  # Timestamp.
                      '(\S+?)'  # Value (int or float).
                      '((?:\s+[-_./a-zA-Z0-9]+=[-_./a-zA-Z0-9]+)*)$',  # Tags
                      line)
    if parsed is None:
        utils.err("invalid data: %s \n" % (line))
        return False
    return True

Example #43

0

Show file

def main(args):
    utils.drop_privileges()
    if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 13  # Ask tcollector not to respawn us
    name_node_service = HadoopNameNode()
    while True:
        name_node_service.emit()
        time.sleep(90)
    return 0

Example #44

0

Show file

File: http.py Project: PredictionIO/tcollector

def collect():
  """Collects HTTP latencies in milliseconds from a list of ports in configuration"""
  ts = time.time()
  try:
    for metric, url in httpconf.urls().iteritems():
      response = requests.get(url)
      latency = response.elapsed.total_seconds() * 1000
      print("%s %i %f" % (metric, ts, latency))
  except Exception as e:
    utils.err("error: something wrong happened in http: %s" % e)

Example #45

0

Show file

File: hbase_master.py Project: CheRuisiBesares/tcollector

def main(args):
    utils.drop_privileges()
    if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 13  # Ask tcollector not to respawn us
    hbase_service = HBaseMaster()
    while True:
        hbase_service.emit()
        time.sleep(90)
    return 0

Example #46

0

Show file

File: postgresql.py Project: zer0n1/tcollector

def postgres_connect(sockdir):
    """Connects to the PostgreSQL server using the specified socket file."""
    user, password = postgresqlconf.get_user_password()

    try:
        return psycopg2.connect("host='%s' user='******' password='******' "
                                "connect_timeout='%s' dbname=postgres" %
                                (sockdir, user, password, CONNECT_TIMEOUT))
    except (EnvironmentError, EOFError, RuntimeError, socket.error), e:
        utils.err("Couldn't connect to DB :%s" % (e))

Example #47

0

Show file

File: hadoop_datanode.py Project: OpenTSDB/tcollector

def main(args):
    utils.drop_privileges()
    if json is None:
        utils.err("This collector requires the `json' Python module.")
        return 13  # Ask tcollector not to respawn us
    datanode_service = HadoopDataNode()
    while True:
        datanode_service.emit()
        time.sleep(15)
    return 0

Example #48

0

Show file

File: postgresql.py Project: CheRuisiBesares/tcollector

def collect(db):
  """
  Collects and prints stats.

  Here we collect only general info, for full list of data for collection
  see http://www.postgresql.org/docs/9.2/static/monitoring-stats.html
  """

  try:
    cursor = db.cursor()

    # general statics
    cursor.execute("SELECT pg_stat_database.*, pg_database_size"
                   " (pg_database.datname) AS size FROM pg_database JOIN"
                   " pg_stat_database ON pg_database.datname ="
                   " pg_stat_database.datname WHERE pg_stat_database.datname"
                   " NOT IN ('template0', 'template1', 'postgres')")
    ts = time.time()
    stats = cursor.fetchall()

#  datid |  datname   | numbackends | xact_commit | xact_rollback | blks_read  |  blks_hit   | tup_returned | tup_fetched | tup_inserted | tup_updated | tup_deleted | conflicts | temp_files |  temp_bytes  | deadlocks | blk_read_time | blk_write_time |          stats_reset          |     size     
    result = {}
    for stat in stats:
      database = stat[1]
      result[database] = stat

    for database in result:
      for i in range(2,len(cursor.description)):
        metric = cursor.description[i].name
        value = result[database][i]
        try:
          if metric in ("stats_reset"):
            continue
          print ("postgresql.%s %i %s database=%s"
                 % (metric, ts, value, database))
        except:
          err("got here")
          continue

    # connections
    cursor.execute("SELECT datname, count(datname) FROM pg_stat_activity"
                   " GROUP BY pg_stat_activity.datname")
    ts = time.time()
    connections = cursor.fetchall()

    for database, connection in connections:
      print ("postgresql.connections %i %s database=%s"
             % (ts, connection, database))

  except (EnvironmentError, EOFError, RuntimeError, socket.error), e:
    if isinstance(e, IOError) and e[0] == errno.EPIPE:
      # exit on a broken pipe. There is no point in continuing
      # because no one will read our stdout anyway.
      return 2
    utils.err("error: failed to collect data: %s" % e)

Example #49

0

Show file

File: postgresql.py Project: CheRuisiBesares/tcollector

def postgres_connect(sockdir):
  """Connects to the PostgreSQL server using the specified socket file."""
  user, password = postgresqlconf.get_user_password()

  try:
    return psycopg2.connect("host='%s' user='******' password='******' "
                            "connect_timeout='%s' dbname=postgres"
                            % (sockdir, user, password,
                            CONNECT_TIMEOUT))
  except (EnvironmentError, EOFError, RuntimeError, socket.error), e:
    utils.err("Couldn't connect to DB :%s" % (e))

Example #50

0

Show file

File: zookeeper.py Project: willmpls/tcollector

def scan_zk_instances():
    """ 
    Finding out all the running instances of zookeeper
    - Using netstat, finds out all listening java processes.	 
    - Figures out ZK instances among java processes by looking for the 
      string "org.apache.zookeeper.server.quorum.QuorumPeerMain" in cmdline.
    """

    instances = []
    try:
        listen_sock = subprocess.check_output(["netstat", "-lnpt"],
                                              stderr=subprocess.PIPE)
    except subprocess.CalledProcessError:
        utils.err("netstat directory doesn't exist in PATH variable")
        return instances

    for line in listen_sock.split("\n"):
        if not "java" in line:
            continue
        listen_sock = line.split()[3]
        tcp_version = line.split()[0]

        m = re.match("(.+):(\d+)", listen_sock)
        ip = m.group(1)
        port = int(m.group(2))

        pid = int(line.split()[6].split("/")[0])
        try:
            fd = open("/proc/%d/cmdline" % pid)
            cmdline = fd.readline()
            if "org.apache.zookeeper.server.quorum.QuorumPeerMain" in cmdline:
                try:
                    if tcp_version == "tcp6":
                        sock = socket.socket(socket.AF_INET6,
                                             socket.SOCK_STREAM)
                    else:
                        sock = socket.socket(socket.AF_INET,
                                             socket.SOCK_STREAM)
                    sock.settimeout(0.5)
                    sock.connect((ip, port))
                    sock.send("ruok\n")
                    data = sock.recv(1024)
                except:
                    pass
                finally:
                    sock.close()
                if data == "imok":
                    instances.append([ip, port, tcp_version])
                    data = ""
        except:
            continue
        finally:
            fd.close()
    return instances

Example #51

0

Show file

File: couchbase.py Project: OpenTSDB/tcollector

def find_conf_file(pid):
  """Returns config file for couchbase-server."""
  try:
    fd = open('/proc/%s/cmdline' % pid)
  except IOError as e:
    utils.err("Couchbase (pid %s) went away ? %s" % (pid, e))
    return
  try:
    config = fd.read().split("config_path")[1].split("\"")[1]
    return config
  finally:
    fd.close()

Example #52

0

Show file

File: zookeeper.py Project: rshivane/xcollector

def connect_socket(tcp_version, port):
    sock = None
    if tcp_version == "tcp6":
        sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
        ipaddr = '::1'
    else:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        ipaddr = '127.0.0.1'
    try:
        sock.connect((ipaddr, port))
    except Exception, err:
        utils.err(err)

Example #53

0

Show file

def get_role_status():
    ms_checker_host = "localhost:3300"
    command_is_salve = "curl " + ms_checker_host + "/checkSlave"
    s, o = commands.getstatusoutput(command_is_salve)
    if o == "" or s != 0:
        utils.err("Error checking mysql role, status %s" % s)
    elif s == 0:
        utils.err("INFO: status msg: %s" % o)
        if "not" not in o.lower():
            return 1

    return 0

Example #54

0

Show file

File: couchbase.py Project: think8848/tcollector

def find_conf_file(pid):
    """Returns config file for couchbase-server."""
    try:
        fd = open('/proc/%s/cmdline' % pid)
    except IOError as e:
        utils.err("Couchbase (pid %s) went away ? %s" % (pid, e))
        return
    try:
        config = fd.read().split("config_path")[1].split("\"")[1]
        return config
    finally:
        fd.close()

Example #55

0

Show file

File: zookeeper.py Project: natbraun/tcollector

def scan_zk_instances():
    """ 
    Finding out all the running instances of zookeeper
    - Using netstat, finds out all listening java processes.	 
    - Figures out ZK instances among java processes by looking for the 
      string "org.apache.zookeeper.server.quorum.QuorumPeerMain" in cmdline.
    """

    instances = []
    try:
        listen_sock = subprocess.check_output(["netstat", "-lnpt"], stderr=subprocess.PIPE)
    except subprocess.CalledProcessError:
        utils.err("netstat directory doesn't exist in PATH variable")
        return instances

    for line in listen_sock.split("\n"):
        if not "java" in line:
            continue
        listen_sock = line.split()[3]
        tcp_version = line.split()[0]

        m = re.match("(.+):(\d+)", listen_sock)
        ip = m.group(1)
        port = int(m.group(2))

        pid = int(line.split()[6].split("/")[0])
        try:
            fd = open("/proc/%d/cmdline" % pid)
            cmdline = fd.readline()
            if "org.apache.zookeeper.server.quorum.QuorumPeerMain" in cmdline:
                try:
                    if tcp_version == "tcp6" or ip == "::":
                        sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
                        ip = "::1"
                    else:
                        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                        ip = "127.0.0.1"
                    sock.settimeout(0.5)
                    sock.connect((ip, port))
                    sock.send("ruok\n")
                    data = sock.recv(1024)
                except:
                    pass
                finally:
                    sock.close()
                if data == "imok":	
                    instances.append([ip, port, tcp_version])
                    data = ""
        except:
            continue
        finally:
            fd.close()
    return instances

Example #56

0

Show file

File: zookeeper.py Project: anrs/tcollector

def connect_socket(tcp_version, port):
    sock = None
    if tcp_version == "tcp6":
        sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
        ipaddr = '::1'
    else:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        ipaddr = '127.0.0.1'
    try:
        sock.connect((ipaddr, port))
    except Exception, err:
        utils.err(err)

Example #57

0

Show file

File: g1gc.py Project: forcemain/tcollector

def process_gc_log(collector):

    prefix = collector['prefix']
    # get latest gc log to process
    gc_log = get_latest_gc_log(collector['log_dir'], collector['log_name_pattern'])

    # update current_file and current_file_pos if this is the first time to
    # process the gc log
    if collector['current_file'] != gc_log:
        collector['current_file'] = gc_log
        with open(gc_log, 'rb') as file_handler:
            collector['current_file_pos'] = get_file_end(file_handler)
        return
    try:
        with open(gc_log, 'rb') as file_handler:

            pos = collector['current_file_pos']
            collector['current_file_pos'] = get_file_end(file_handler)
            file_handler.seek(pos)

            # Do not use foreach loop because inside function process_gc_record
            # will call file_handler.readline(). The reason is that some GC
            # event are multiline and need to be processed as a whole
            while True:
                line = file_handler.readline()
                if len(line) == 0:
                    break
                pattern_name, matcher = match_pattern(line)
                if pattern_name == GC_START_TIME_PATTERN:
                    year, month, day, hour, minute, second, timezone = [int(matcher.group(i)) for i in range(1, 8)]
                    cause = matcher.group(8)
                    timestamp = true_unix_timestamp(year, month, day, hour, minute, second, timezone)
                    process_gc_record(prefix, file_handler, timestamp, cause, collector)
                else:
                    unmatched_gc_log(line)

        current_timestamp_in_sec = int(time.time())

        if not collector['timestamp'] is None:
            for gen, value in collector['gensize'].items():
                print "%s.gc.g1.gensize %s %s gen=%s" % (prefix, current_timestamp_in_sec, value, gen)

        # publish gc event count metrics
        for event, value in collector['count'].items():
            print "%s.gc.g1.event.count %s %s event=%s" % (prefix, current_timestamp_in_sec, value, event)

    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        utils.err(''.join(
            traceback.format_exception(exc_type, exc_value, exc_traceback)))

    return 0

Example #58

0

Show file

File: couchbase.py Project: OpenTSDB/tcollector

def find_bindir_path(config_file):
  """Returns the bin directory path"""
  try:
    fd = open(config_file)
  except IOError as e:
    utils.err("Error for Config file (%s): %s" % (config_file, e))
    return None
  try:
    for line in fd:
      if line.startswith("{path_config_bindir"):
        return line.split(",")[1].split("\"")[1]
  finally:
    fd.close()

Example #59

0

Show file

File: postgresql_replication.py Project: OpenTSDB/tcollector

def main(args):
  """Collects and dumps stats from a PostgreSQL server."""

  try:
    db = postgresqlutils.connect()
  except (Exception) as e:
    utils.err("error: Could not initialize collector : %s" % (e))
    return 13 # Ask tcollector to not respawn us

  while True:
    collect(db)
    sys.stdout.flush()
    time.sleep(COLLECTION_INTERVAL)

Example #60

0

Show file

File: libvirt_vm.py Project: Misenko/tcollector

def process_domain(domain, pid):
    """Process one domain (vm)"""
    # skip vms that are not running
    if domain.isActive() != 1:
        utils.err("Domain %s is inactive. Skipping." % domain.name())
        return False
    if not pid:
        utils.err("Cannot find PID for domain %s. Skipping." % domain.name())
        return False
    if not psutil.pid_exists(pid):
        utils.err("PID %d no longer exists for domain %s. Skipping." %
                  (pid, domain.name()))
        return False

    # populate vm structure with metrics
    try:
        vm = {}
        vm[FIELDS["cpu_time"]] = get_cpu_time(pid)
        vm[FIELDS["cpu_load"]] = get_cpu_load(pid)
        vm[FIELDS["memory"]] = get_memory(domain)
        vm[FIELDS["max_memory"]] = domain.maxMemory()
        vm[FIELDS["max_vcpus"]] = domain.maxVcpus()

        xml = BeautifulSoup(domain.XMLDesc())
        vm[TAG_DEPLOY_ID] = domain.name()
        vm[TAG_TYPE] = get_type(domain, xml)

        vm.update(get_network_traffic(domain, xml))
        vm.update(get_disk_io(domain, xml))
    except LibvirtVmDataError as err:
        utils.err(err.value)
        return False

    print_vm(vm)
    return True