Example #1
0
def process(metric, datapoint):
  increment('datapointsReceived')

  for rule in RewriteRuleManager.preRules:
    metric = rule.apply(metric)

  aggregate_metrics = []

  for rule in RuleManager.rules:
    aggregate_metric = rule.get_aggregate_metric(metric)

    if aggregate_metric is None:
      continue
    else:
      aggregate_metrics.append(aggregate_metric)

    buffer = BufferManager.get_buffer(aggregate_metric)

    if not buffer.configured:
      buffer.configure_aggregation(rule.frequency, rule.aggregation_func)

    buffer.input(datapoint)

  for rule in RewriteRuleManager.postRules:
    metric = rule.apply(metric)

  if metric not in aggregate_metrics:
    events.metricGenerated(metric, datapoint)

  if len(aggregate_metrics) == 0:
    log.msg("Couldn't match metric %s with any aggregation rule. Passing on un-aggregated." % metric)
Example #2
0
def loadStorageSchemas():
  schemaList = []
  config = OrderedConfigParser()
  config.read(STORAGE_SCHEMAS_CONFIG)

  for section in config.sections():
    options = dict( config.items(section) )
    pattern = options.get('pattern')

    retentions = options['retentions'].split(',')
    archives = [ Archive.fromString(s) for s in retentions ]

    if pattern:
      mySchema = PatternSchema(section, pattern, archives)
    else:
      log.err("Section missing 'pattern': %s" % section)
      continue

    archiveList = [a.getTuple() for a in archives]

    try:
      whisper.validateArchiveList(archiveList)
      schemaList.append(mySchema)
    except whisper.InvalidConfiguration, e:
      log.msg("Invalid schemas found in %s: %s" % (section, e) )
Example #3
0
 def connectionLost(self, reason):
   log.msg('stdin disconnected')
   def startShutdown(results):
     log.msg("startShutdown(%s)" % str(results))
     allStopped = client_manager.stopAllClients()
     allStopped.addCallback(shutdown)
   firstConnectsAttempted.addCallback(startShutdown)
Example #4
0
def optimalWriteOrder():
  "Generates metrics with the most cached values first and applies a soft rate limit on new metrics"
  global lastCreateInterval
  global createCount
  metrics = [ (metric, len(datapoints)) for metric,datapoints in MetricCache.items() ]

  t = time.time()
  metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending
  log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t))

  for metric, queueSize in metrics:
    dbFilePath = getFilesystemPath(metric)
    dbFileExists = exists(dbFilePath)

    if not dbFileExists:
      createCount += 1
      now = time.time()

      if now - lastCreateInterval >= 60:
        lastCreateInterval = now
        createCount = 1

      elif createCount >= settings.MAX_CREATES_PER_MINUTE:
        continue

    try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
      datapoints = MetricCache.pop(metric)
    except KeyError:
      log.msg("MetricCache contention, skipping %s update for now" % metric)
      continue # we simply move on to the next metric when this race condition occurs

    yield (metric, datapoints, dbFilePath, dbFileExists)
Example #5
0
 def _generate_queue():
   while True:
     t = time.time()
     metric_counts = sorted(self.cache.counts, key=lambda x: x[1])
     log.msg("Sorted %d cache queues in %.6f seconds" % (len(metric_counts), time.time() - t))
     while metric_counts:
       yield itemgetter(0)(metric_counts.pop())
Example #6
0
def _flush(prefix=None):
    """ Write/create whisped files at maximal speed """
    assert(prefix==None or hasattr(prefix, 'startswith'))
    log.msg("flush started (prefix: %s)" % prefix)
    started = time.time()
    metrics = MetricCache.counts()
    updates = 0
    write_lock.acquire()
    try:
        for metric, queueSize in metrics:
            if prefix and not metric.startswith(prefix):
                continue
            dbFilePath = getFilesystemPath(metric)
            dbFileExists = exists(dbFilePath)
            try:
                datapoints = MetricCache.pop(metric)
            except KeyError:
                continue
            if not createWhisperFile(metric, dbFilePath, dbFileExists):
                continue
            if not writeWhisperFile(dbFilePath, datapoints):
                continue
    	    updates += 1
    finally:
        write_lock.release()
    log.msg('flush finished (updates: %d, time: %.5f sec)' % (updates, time.time()-started))
    return updates
Example #7
0
def reloadAggregationSchemas():
  global AGGREGATION_SCHEMAS
  try:
    AGGREGATION_SCHEMAS = loadAggregationSchemas()
  except Exception:
    log.msg("Failed to reload aggregation SCHEMAS")
    log.err()
Example #8
0
def reloadAggregationSchemas():
  global agg_schemas
  try:
    agg_schemas = loadAggregationSchemas()
  except:
    log.msg("Failed to reload aggregation schemas")
    log.err()
def optimalWriteOrder():
  "Generates metrics with the most cached values first and applies a soft rate limit on new metrics"
  global lastCreateInterval
  global createCount
  metrics = MetricCache.counts()

  t = time.time()
  metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending
  log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t))

  for metric, queueSize in metrics:
    if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK:
      events.cacheSpaceAvailable()

    # Let our persister do its own check, and ignore the metric if needed.
    if not persister.pre_get_datapoints_check(metric):
        continue

    try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
      datapoints = MetricCache.pop(metric)
    except KeyError:
      log.msg("MetricCache contention, skipping %s update for now" % metric)
      continue # we simply move on to the next metric when this race condition occurs

    dbInfo = persister.get_dbinfo(metric)
    dbIdentifier = dbInfo[0]
    dbExists = dbInfo[1]

    yield (metric, datapoints, dbIdentifier, dbExists)
Example #10
0
def reloadStorageSchemas():
  global schemas
  try:
    schemas = loadStorageSchemas()
  except:
    log.msg("Failed to reload storage schemas")
    log.err()
Example #11
0
def loadStorageSchemas():
  schemaList = []
  config = OrderedConfigParser()
  config.read(STORAGE_SCHEMAS_CONFIG)

  for section in config.sections():
    options = dict(config.items(section))
    pattern = options.get('pattern')

    try:
      retentions = options['retentions'].split(',')
      archives = [Archive.fromString(s) for s in retentions]
    except KeyError:
      log.err("Schema %s missing 'retentions', skipping" % section)
      continue

    if pattern:
      mySchema = PatternSchema(section, pattern, archives)
    else:
      log.err("Schema %s missing 'pattern', skipping" % section)
      continue

    archiveList = [a.getTuple() for a in archives]

    try:
      if state.database is not None:
        state.database.validateArchiveList(archiveList)
      schemaList.append(mySchema)
    except ValueError as e:
      log.msg("Invalid schemas found in %s: %s" % (section, e))

  schemaList.append(defaultSchema)
  return schemaList
Example #12
0
def reloadStorageSchemas():
  global SCHEMAS
  try:
    SCHEMAS = loadStorageSchemas()
  except Exception:
    log.msg("Failed to reload storage SCHEMAS")
    log.err()
Example #13
0
def loadStorageSchemas():
  schemaList = []
  config = OrderedConfigParser()
  config.read(STORAGE_SCHEMAS_CONFIG)

  for section in config.sections():
    options = dict(config.items(section))
    matchAll = options.get('match-all')
    pattern = options.get('pattern')
    listName = options.get('list')

    retentions = options['retentions'].split(',')
    archives = [Archive.fromString(s) for s in retentions]

    if matchAll:
      mySchema = DefaultSchema(section, archives)

    elif pattern:
      mySchema = PatternSchema(section, pattern, archives)

    elif listName:
      mySchema = ListSchema(section, listName, archives)

    archiveList = [a.getTuple() for a in archives]

    try:
      whisper.validateArchiveList(archiveList)
      schemaList.append(mySchema)
    except whisper.InvalidConfiguration, e:
      log.msg("Invalid schemas found in %s: %s" % (section, e))
Example #14
0
  def process(self, metric, datapoint):
    increment('datapointsReceived')

    aggregate_metrics = set()

    for rule in RuleManager.rules:
      aggregate_metric = rule.get_aggregate_metric(metric)

      if aggregate_metric is None:
        continue
      else:
        aggregate_metrics.add(aggregate_metric)

      values_buffer = BufferManager.get_buffer(aggregate_metric)

      if not values_buffer.configured:
        values_buffer.configure_aggregation(rule.frequency, rule.aggregation_func)

      values_buffer.input(datapoint)

    if settings.FORWARD_ALL and metric not in aggregate_metrics:
      if settings.LOG_AGGREGATOR_MISSES and len(aggregate_metrics) == 0:
        log.msg(
          "Couldn't match metric %s with any aggregation rule. Passing on un-aggregated." % metric)
      yield (metric, datapoint)
    def processMessage(self, message, channel):
        """Parse a message and post it as a metric."""

        if self.factory.verbose:
            log.listener("Message received: %s" % (message,))

        metric = message.routing_key

        for line in message.content.body.split("\n"):
            line = line.strip()
            if not line:
                continue
            try:
                #log.listener("Trying...")
            #    if settings.get("AMQP_METRIC_NAME_IN_BODY", False):
            #        metric, value, timestamp = line.split()
            #        log.listener("Metric in body")
            #    else:
            #        log.listener("Metric not in body") 
                value, timestamp = line.split()
                #log.listener("Value:%f   Timestamp:%f"%(float(value),float(timestamp))) 
                datapoint = ( float(timestamp), float(value) )
            except ValueError:
                log.listener("invalid message line: %s" % (line,))
                continue

            events.metricReceived(metric, datapoint)

            if self.factory.verbose:
                log.listener("Metric posted: %s %s %s" %
                             (metric, value, timestamp,))
        log.msg("Acking...")  
        channel.basic_ack(delivery_tag = message.delivery_tag, multiple = False)
        log.msg("Ack Done!!")
Example #16
0
def writeCachedDataPoints():
  "Write datapoints until the MetricCache is completely empty"

  while MetricCache:
    dataWritten = False

    for (metric, datapoints, dbFileExists) in optimalWriteOrder():
      dataWritten = True

      if not dbFileExists:
        archiveConfig = None
        xFilesFactor, aggregationMethod = None, None

        for schema in SCHEMAS:
          if schema.matches(metric):
            log.creates('new metric %s matched schema %s' % (metric, schema.name))
            archiveConfig = [archive.getTuple() for archive in schema.archives]
            break

        for schema in AGGREGATION_SCHEMAS:
          if schema.matches(metric):
            log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name))
            xFilesFactor, aggregationMethod = schema.archives
            break

        if not archiveConfig:
          raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric)

        log.creates("creating database metric %s (archive=%s xff=%s agg=%s)" %
                    (metric, archiveConfig, xFilesFactor, aggregationMethod))
        try:
            state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod)
            instrumentation.increment('creates')
        except Exception, e:
            log.err()
            log.msg("Error creating %s: %s" % (metric, e))
            instrumentation.increment('errors')
            continue
      # If we've got a rate limit configured lets makes sure we enforce it
      if UPDATE_BUCKET:
        UPDATE_BUCKET.drain(1, blocking=True)
      try:
        t1 = time.time()
        # If we have duplicated points, always pick the last. update_many()
        # has no guaranted behavior for that, and in fact the current implementation
        # will keep the first point in the list.
        datapoints = dict(datapoints).items()
        state.database.write(metric, datapoints)
        updateTime = time.time() - t1
      except Exception, e:
        log.err()
        log.msg("Error writing to %s: %s" % (metric, e))
        instrumentation.increment('errors')
      else:
        pointCount = len(datapoints)
        instrumentation.increment('committedPoints', pointCount)
        instrumentation.append('updateTimes', updateTime)
        if settings.LOG_UPDATES:
          log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))
Example #17
0
def createClientConnections(hosts):
  for (server, port, instance) in hosts:
    log.msg("Connecting to destination server %s:%d" % (server, port))
    factory = MetricSenderFactory(server, port)
    clientConnections.append(factory)
    reactor.connectTCP(server, port, factory)

  clientConnections.sort(key=lambda f: f.remoteAddr) # normalize the order
def backIntoCache(metricList):
  for (metric, datapoints) in metricList:
    for point in datapoints:
      try:
        MetricCache.store(metric, point)
      except:
        datapoints.append(point)
  log.msg("Failed to publish to RabbitMQ. Pushed the metrics back to cache")
 def receive_loop(self):
     queue = yield self.queue(self.consumer_tag)
     channel = yield self.channel(2)         
     while True:
         log.msg("Getting msg from queue") 
         msg = yield queue.get()
         log.msg("Got it!!")
         self.processMessage(msg, channel)
Example #20
0
def reloadAggregationSchemas(first_run=False):
  global agg_schemas
  try:
    agg_schemas = loadAggregationSchemas()
  except Exception, e:
    if first_run:
      raise e
    log.msg("Failed to reload aggregation schemas")
    log.err()
Example #21
0
def reloadStorageSchemas(first_run=False):
  global schemas
  try:
    schemas = loadStorageSchemas()
  except Exception, e:
    if first_run:
        raise e
    log.msg("Failed to reload storage schemas")
    log.err()
Example #22
0
def writeWhisperFile(dbFilePath, datapoints):
  try:
    whisper.update_many(dbFilePath, datapoints)
  except:
    log.msg("Error writing to %s" % (dbFilePath))
    log.err()
    instrumentation.increment('errors')
    return False
  return True
Example #23
0
 def _generate_queue():
   while True:
     t = time.time()
     metric_counts = sorted(self.cache.counts, key=lambda x: x[1])
     if settings.LOG_CACHE_QUEUE_SORTS:
       log.msg("Sorted %d cache queues in %.6f seconds" % (len(metric_counts), time.time() - t))
     while metric_counts:
       yield itemgetter(0)(metric_counts.pop())
     log.msg("Queue consumed in %.6f seconds" % (time.time() - t))
Example #24
0
  def process(self, metric, datapoint):
    # normalize metric name (reorder tags)
    try:
      metric = TaggedSeries.parse(metric).path
    except Exception as err:
      log.msg('Error parsing metric %s: %s' % (metric, err))

    self.cache.store(metric, datapoint)
    return Processor.NO_OUTPUT
Example #25
0
 def startService(self):
     if 'signal' in globals().keys():
       log.msg("Installing SIG_IGN for SIGHUP")
       signal.signal(signal.SIGHUP, signal.SIG_IGN)
     self.storage_reload_task.start(60, False)
     self.aggregation_reload_task.start(60, False)
     reactor.addSystemEventTrigger('before', 'shutdown', shutdownModifyUpdateSpeed)
     reactor.callInThread(writeForever)
     Service.startService(self)
Example #26
0
def writeCachedDataPoints():
  "Write datapoints until the MetricCache is completely empty"

  while MetricCache:
    dataWritten = False

    for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder():
      dataWritten = True

      if not dbFileExists:
        archiveConfig = None
        xFilesFactor, aggregationMethod = None, None

        for schema in SCHEMAS:
          if schema.matches(metric):
            log.creates('new metric %s matched schema %s' % (metric, schema.name))
            archiveConfig = [archive.getTuple() for archive in schema.archives]
            break

        for schema in AGGREGATION_SCHEMAS:
          if schema.matches(metric):
            log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name))
            xFilesFactor, aggregationMethod = schema.archives
            break

        if not archiveConfig:
          raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric)

        log.creates("creating database file %s (archive=%s xff=%s agg=%s)" %
                    (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod))
        try:
            state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod)
            instrumentation.increment('creates')
        except Exception:
            log.err("Error creating %s" % (dbFilePath))
            continue
      # If we've got a rate limit configured lets makes sure we enforce it
      if UPDATE_BUCKET:
        UPDATE_BUCKET.drain(1, blocking=True)
      try:
        t1 = time.time()
        state.database.write(metric, datapoints)
        updateTime = time.time() - t1
      except Exception:
        log.msg("Error writing to %s" % (dbFilePath))
        log.err()
        instrumentation.increment('errors')
      else:
        pointCount = len(datapoints)
        instrumentation.increment('committedPoints', pointCount)
        instrumentation.append('updateTimes', updateTime)
        if settings.LOG_UPDATES:
          log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))

    # Avoid churning CPU when only new metrics are in the cache
    if not dataWritten:
      time.sleep(0.1)
Example #27
0
 def _generate_queue():
   while True:
     t = time.time()
     metric_lw = sorted(self.cache.watermarks, key=lambda x: x[1], reverse=True)
     if settings.LOG_CACHE_QUEUE_SORTS:
       log.msg("Sorted %d cache queues in %.6f seconds" % (len(metric_lw), time.time() - t))
     while metric_lw:
       yield itemgetter(0)(metric_lw.pop())
     log.msg("Queue consumed in %.6f seconds" % (time.time() - t))
Example #28
0
def shutdownModifyUpdateSpeed():
    try:
        shut = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN
        if UPDATE_BUCKET:
          UPDATE_BUCKET.setCapacityAndFillRate(shut,shut)
        if CREATE_BUCKET:
          CREATE_BUCKET.setCapacityAndFillRate(shut,shut)
        log.msg("Carbon shutting down.  Changed the update rate to: " + str(settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN))
    except KeyError:
        log.msg("Carbon shutting down.  Update rate not changed")
def publishForever(channel, exchange):          
        while reactor.running:
#            msg = yield queue.get()
#            self.processMessage(msg, channel)
            try:
              log.msg("Calling function writeCachedDataPoints")
              writeCachedDataPoints(channel, exchange)
            except: 
              log.err()
            log.msg("Time to sleep")
            time.sleep(10)          
Example #30
0
  def process(self, metric, datapoint):
    if settings.TAG_RELAY_NORMALIZED:
      # normalize metric name
      try:
        metric = TaggedSeries.parse(metric).path
      except Exception as err:
        log.msg('Error parsing metric %s: %s' % (metric, err))
        # continue anyway with processing the unnormalized metric for robustness

    state.client_manager.sendDatapoint(metric, datapoint)
    return pipeline.Processor.NO_OUTPUT
Example #31
0
 def store(self, metric, datapoint):
     self.setdefault(metric, {})
     timestamp, value = datapoint
     if timestamp not in self[metric]:
         # Not a duplicate, hence process if cache is not full
         if self.is_full:
             log.msg("MetricCache is full: self.size=%d" % self.size)
             events.cacheFull()
         else:
             self.size += 1
             self[metric][timestamp] = value
     else:
         # Updating a duplicate does not increase the cache size
         self[metric][timestamp] = value
Example #32
0
    def __init__(self, settings):
      super(WhisperDatabase, self).__init__(settings)

      self.data_dir = settings.LOCAL_DATA_DIR
      self.sparse_create = settings.WHISPER_SPARSE_CREATE
      self.fallocate_create = settings.WHISPER_FALLOCATE_CREATE
      if settings.WHISPER_AUTOFLUSH:
        log.msg("Enabling Whisper autoflush")
        whisper.AUTOFLUSH = True

      if settings.WHISPER_FALLOCATE_CREATE:
        if whisper.CAN_FALLOCATE:
          log.msg("Enabling Whisper fallocate support")
        else:
          log.err("WHISPER_FALLOCATE_CREATE is enabled but linking failed.")

      if settings.WHISPER_LOCK_WRITES:
        if whisper.CAN_LOCK:
          log.msg("Enabling Whisper file locking")
          whisper.LOCK = True
        else:
          log.err("WHISPER_LOCK_WRITES is enabled but import of fcntl module failed.")

      if settings.WHISPER_FADVISE_RANDOM:
        try:
          if whisper.CAN_FADVISE:
            log.msg("Enabling Whisper fadvise_random support")
            whisper.FADVISE_RANDOM = True
          else:
            log.err("WHISPER_FADVISE_RANDOM is enabled but import of ftools module failed.")
        except AttributeError:
          log.err("WHISPER_FADVISE_RANDOM is enabled but skipped because it is not compatible " +
                  "with the version of Whisper.")
Example #33
0
def shutdownModifyUpdateSpeed():
    try:
        shut = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN
        if UPDATE_BUCKET:
          UPDATE_BUCKET.setCapacityAndFillRate(shut, shut)
        if CREATE_BUCKET:
          CREATE_BUCKET.setCapacityAndFillRate(shut, shut)
        log.msg("Carbon shutting down.  Changed the update rate to: " +
                str(settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN))
    except KeyError:
        log.msg("Carbon shutting down.  Update rate not changed")

    # Also set MIN_TIMESTAMP_LAG to 0 to avoid waiting for nothing.
    settings.MIN_TIMESTAMP_LAG = 0
Example #34
0
    def tag(self, *metrics):
        from carbon.http import httpRequest

        log.debug("Tagging %s" % ', '.join(metrics), type='tagdb')
        t = time.time()

        try:
            httpRequest(self.graphite_url + '/tags/tagMultiSeries',
                        [('path', metric) for metric in metrics])
            log.debug("Tagged %s in %s" %
                      (', '.join(metrics), time.time() - t),
                      type='tagdb')
        except Exception as err:
            log.msg("Error tagging %s: %s" % (', '.join(metrics), err),
                    type='tagdb')
Example #35
0
 def _generate_queue():
     while True:
         t = time.time()
         metric_lw = sorted(self.cache.watermarks,
                            key=lambda x: x[1],
                            reverse=True)
         size = len(metric_lw)
         if settings.LOG_CACHE_QUEUE_SORTS and size:
             log.msg("Sorted %d cache queues in %.6f seconds" %
                     (size, time.time() - t))
         while metric_lw:
             yield itemgetter(0)(metric_lw.pop())
         if settings.LOG_CACHE_QUEUE_SORTS and size:
             log.msg("Queue consumed in %.6f seconds" %
                     (time.time() - t))
Example #36
0
    def tag(self, metric):
        from carbon.http import httpRequest

        log.msg("Tagging %s" % metric)
        t = time.time()

        def successHandler(result, *args, **kw):
            log.msg("Tagged %s: %s in %s" % (metric, result, time.time() - t))

        def errorHandler(err):
            log.msg("Error tagging %s: %s" % (metric, err))

        httpRequest(self.graphite_url + '/tags/tagSeries', {
            'path': metric
        }).addCallback(successHandler).addErrback(errorHandler)
Example #37
0
 def _generate_queue():
   while True:
     t = time.time()
     metric_lw = sorted(self.cache.watermarks, key=lambda x: x[1], reverse=True)
     if settings.MIN_TIMESTAMP_LAG:
       metric_lw = [x for x in metric_lw if t - x[1] > settings.MIN_TIMESTAMP_LAG]
     size = len(metric_lw)
     if settings.LOG_CACHE_QUEUE_SORTS and size:
       log.msg("Sorted %d cache queues in %.6f seconds" % (size, time.time() - t))
     if not metric_lw:
       # If there is nothing to do give a chance to sleep to the reader.
       yield None
     while metric_lw:
       yield itemgetter(0)(metric_lw.pop())
     if settings.LOG_CACHE_QUEUE_SORTS and size:
       log.msg("Queue consumed in %.6f seconds" % (time.time() - t))
Example #38
0
    def __init__(self, settings):
      super(CeresDatabase, self).__init__(settings)

      self.data_dir = settings.LOCAL_DATA_DIR
      ceres.setDefaultNodeCachingBehavior(settings.CERES_NODE_CACHING_BEHAVIOR)
      ceres.setDefaultSliceCachingBehavior(settings.CERES_SLICE_CACHING_BEHAVIOR)
      ceres.MAX_SLICE_GAP = int(settings.CERES_MAX_SLICE_GAP)

      if settings.CERES_LOCK_WRITES:
        if ceres.CAN_LOCK:
          log.msg("Enabling Ceres file locking")
          ceres.LOCK_WRITES = True
        else:
          log.err("CERES_LOCK_WRITES is enabled but import of fcntl module failed.")

      self.tree = ceres.CeresTree(self.data_dir)
Example #39
0
def optimalWriteOrder():
    """Generates metrics with the most cached values first and applies a soft
  rate limit on new metrics"""
    global lastCreateInterval
    global createCount
    metrics = MetricCache.counts()

    t = time.time()
    metrics.sort(key=lambda item: item[1],
                 reverse=True)  # by queue size, descending
    log.debug("Sorted %d cache queues in %.6f seconds" %
              (len(metrics), time.time() - t))

    for metric, queueSize in metrics:
        if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK:
            events.cacheSpaceAvailable()

        dbFilePath = getFilesystemPath(metric)
        dbFileExists = exists(dbFilePath)

        if not dbFileExists:
            createCount += 1
            now = time.time()

            if now - lastCreateInterval >= 60:
                lastCreateInterval = now
                createCount = 1

            elif createCount >= settings.MAX_CREATES_PER_MINUTE:
                # dropping queued up datapoints for new metrics prevents filling up the entire cache
                # when a bunch of new metrics are received.
                try:
                    MetricCache.pop(metric)
                except KeyError:
                    pass

                continue

        try:  # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
            datapoints = MetricCache.pop(metric)
        except KeyError:
            log.msg("MetricCache contention, skipping %s update for now" %
                    metric)
            continue  # we simply move on to the next metric when this race condition occurs

        yield (metric, datapoints, dbFilePath, dbFileExists)
Example #40
0
def loadAggregationSchemas():
    # NOTE: This abuses the Schema classes above, and should probably be refactored.
    schemaList = []
    config = OrderedConfigParser()

    try:
        config.read(STORAGE_AGGREGATION_CONFIG)
    except (IOError, CarbonConfigException):
        log.msg("%s not found or wrong perms, ignoring." %
                STORAGE_AGGREGATION_CONFIG)

    for section in config.sections():
        options = dict(config.items(section))
        matchAll = options.get('match-all')
        pattern = options.get('pattern')
        listName = options.get('list')

        xFilesFactor = options.get('xfilesfactor')
        aggregationMethod = options.get('aggregationmethod')

        try:
            if xFilesFactor is not None:
                xFilesFactor = float(xFilesFactor)
                assert 0 <= xFilesFactor <= 1
            if aggregationMethod is not None:
                assert aggregationMethod in whisper.aggregationMethods
        except:
            log.msg("Invalid schemas found in %s." % section)
            continue

        archives = (xFilesFactor, aggregationMethod)

        if matchAll:
            mySchema = DefaultSchema(section, archives)

        elif pattern:
            mySchema = PatternSchema(section, pattern, archives)

        elif listName:
            mySchema = ListSchema(section, listName, archives)

        schemaList.append(mySchema)

    schemaList.append(defaultAggregation)
    return schemaList
Example #41
0
def loadAggregationSchemas():
    # NOTE: This abuses the Schema classes above, and should probably be refactored.
    schemaList = []
    config = OrderedConfigParser()

    try:
        config.read(STORAGE_AGGREGATION_CONFIG)
    except (IOError, CarbonConfigException):
        log.msg("%s not found or wrong perms, ignoring." %
                STORAGE_AGGREGATION_CONFIG)

    for section in config.sections():
        options = dict(config.items(section))
        pattern = options.get('pattern')

        xFilesFactor = options.get('xfilesfactor')
        aggregationMethod = options.get('aggregationmethod')

        try:
            if xFilesFactor is not None:
                xFilesFactor = float(xFilesFactor)
                assert 0 <= xFilesFactor <= 1
            if aggregationMethod is not None:
                if state.database is not None:
                    assert aggregationMethod in state.database.aggregationMethods
        except ValueError:
            log.msg("Invalid schemas found in %s." % section)
            continue

        archives = (xFilesFactor, aggregationMethod)

        if pattern:
            mySchema = PatternSchema(section, pattern, archives)
        else:
            log.err("Section missing 'pattern': %s" % section)
            continue

        schemaList.append(mySchema)

    schemaList.append(defaultAggregation)
    return schemaList
Example #42
0
def loadStorageSchemas():
    schemaList = []
    config = OrderedConfigParser()
    config.read(STORAGE_SCHEMAS_CONFIG)

    for section in config.sections():
        options = dict(config.items(section))
        pattern = options.get('pattern')

        try:
            retentions = options['retentions'].split(',')
        except KeyError:
            log.err("Schema %s missing 'retentions', skipping" % section)
            continue

        try:
            archives = [Archive.fromString(s) for s in retentions]
        except ValueError as exc:
            log.err("{msg} in section [{section}] in {fn}".format(
                msg=exc, section=section.title(), fn=STORAGE_SCHEMAS_CONFIG))
            raise SystemExit(1)

        if pattern:
            mySchema = PatternSchema(section, pattern, archives)
        else:
            log.err("Schema %s missing 'pattern', skipping" % section)
            continue

        archiveList = [a.getTuple() for a in archives]

        try:
            if state.database is not None:
                state.database.validateArchiveList(archiveList)
            schemaList.append(mySchema)
        except ValueError as e:
            log.msg("Invalid schemas found in %s: %s" % (section, e))

    schemaList.append(defaultSchema)
    return schemaList
Example #43
0
        def __init__(self, settings):
            self.data_dir = settings.LOCAL_DATA_DIR
            self.sparse_create = settings.WHISPER_SPARSE_CREATE
            self.fallocate_create = settings.WHISPER_FALLOCATE_CREATE
            if settings.WHISPER_AUTOFLUSH:
                log.msg("Enabling Whisper autoflush")
                whisper.AUTOFLUSH = True

            if settings.WHISPER_FALLOCATE_CREATE:
                if whisper.CAN_FALLOCATE:
                    log.msg("Enabling Whisper fallocate support")
                else:
                    log.err(
                        "WHISPER_FALLOCATE_CREATE is enabled but linking failed."
                    )

            if settings.WHISPER_LOCK_WRITES:
                if whisper.CAN_LOCK:
                    log.msg("Enabling Whisper file locking")
                    whisper.LOCK = True
                else:
                    log.err(
                        "WHISPER_LOCK_WRITES is enabled but import of fcntl module failed."
                    )
Example #44
0
def writeCachedDataPoints():
  "Write datapoints until the MetricCache is completely empty"

  cache = MetricCache()
  while cache:
    (metric, datapoints) = cache.drain_metric()
    if metric is None:
      # end the loop
      break

    dbFileExists = state.database.exists(metric)

    if not dbFileExists:
      if CREATE_BUCKET and not CREATE_BUCKET.drain(1):
        # If our tokenbucket doesn't have enough tokens available to create a new metric
        # file then we'll just drop the metric on the ground and move on to the next
        # metric.
        # XXX This behavior should probably be configurable to no tdrop metrics
        # when rate limitng unless our cache is too big or some other legit
        # reason.
        instrumentation.increment('droppedCreates')
        continue

      archiveConfig = None
      xFilesFactor, aggregationMethod = None, None

      for schema in SCHEMAS:
        if schema.matches(metric):
          if settings.LOG_CREATES:
            log.creates('new metric %s matched schema %s' % (metric, schema.name))
          archiveConfig = [archive.getTuple() for archive in schema.archives]
          break

      for schema in AGGREGATION_SCHEMAS:
        if schema.matches(metric):
          if settings.LOG_CREATES:
            log.creates('new metric %s matched aggregation schema %s'
                        % (metric, schema.name))
          xFilesFactor, aggregationMethod = schema.archives
          break

      if not archiveConfig:
        raise Exception(("No storage schema matched the metric '%s',"
                         " check your storage-schemas.conf file.") % metric)

      if settings.LOG_CREATES:
        log.creates("creating database metric %s (archive=%s xff=%s agg=%s)" %
                    (metric, archiveConfig, xFilesFactor, aggregationMethod))
      try:
        state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod)
        if settings.ENABLE_TAGS:
          tagQueue.add(metric)
        instrumentation.increment('creates')
      except Exception as e:
        log.err()
        log.msg("Error creating %s: %s" % (metric, e))
        instrumentation.increment('errors')
        continue

    # If we've got a rate limit configured lets makes sure we enforce it
    waitTime = 0
    if UPDATE_BUCKET:
      t1 = time.time()
      yield UPDATE_BUCKET.drain(1, blocking=True)
      waitTime = time.time() - t1

    try:
      t1 = time.time()
      # If we have duplicated points, always pick the last. update_many()
      # has no guaranted behavior for that, and in fact the current implementation
      # will keep the first point in the list.
      datapoints = dict(datapoints).items()
      state.database.write(metric, datapoints)
      if settings.ENABLE_TAGS:
        tagQueue.update(metric)
      updateTime = time.time() - t1
    except Exception as e:
      log.err()
      log.msg("Error writing to %s: %s" % (metric, e))
      instrumentation.increment('errors')
    else:
      pointCount = len(datapoints)
      instrumentation.increment('committedPoints', pointCount)
      instrumentation.append('updateTimes', updateTime)
      if settings.LOG_UPDATES:
        if waitTime > 0.001:
          log.updates("wrote %d datapoints for %s in %.5f seconds after waiting %.5f seconds" % (
            pointCount, metric, updateTime, waitTime))
        else:
          log.updates("wrote %d datapoints for %s in %.5f seconds" % (
            pointCount, metric, updateTime))
Example #45
0
def reloadAggregationSchemas():
  global AGGREGATION_SCHEMAS
  try:
    AGGREGATION_SCHEMAS = loadAggregationSchemas()
  except Exception as e:
    log.msg("Failed to reload aggregation SCHEMAS: %s" % (e))
Example #46
0
 def errorHandler(err):
   log.msg("Error tagging %s: %s" % (', '.join(metrics), err.getErrorMessage()), type='tagdb')
   return err
Example #47
0
def writeCachedDataPoints():
    "Write datapoints until the MetricCache is completely empty"

    while MetricCache:
        dataWritten = False

        for (metric, datapoints, dbFilePath,
             dbFileExists) in optimalWriteOrder():
            dataWritten = True

            if not dbFileExists:
                archiveConfig = None
                xFilesFactor, aggregationMethod = None, None

                for schema in SCHEMAS:
                    if schema.matches(metric):
                        log.creates('new metric %s matched schema %s' %
                                    (metric, schema.name))
                        archiveConfig = [
                            archive.getTuple() for archive in schema.archives
                        ]
                        break

                for schema in AGGREGATION_SCHEMAS:
                    if schema.matches(metric):
                        log.creates(
                            'new metric %s matched aggregation schema %s' %
                            (metric, schema.name))
                        xFilesFactor, aggregationMethod = schema.archives
                        break

                if not archiveConfig:
                    raise Exception(
                        "No storage schema matched the metric '%s', check your storage-schemas.conf file."
                        % metric)

                dbDir = dirname(dbFilePath)
                try:
                    if not exists(dbDir):
                        os.makedirs(dbDir)
                except OSError, e:
                    log.err("%s" % e)
                log.creates(
                    "creating database file %s (archive=%s xff=%s agg=%s)" %
                    (dbFilePath, archiveConfig, xFilesFactor,
                     aggregationMethod))
                try:
                    whisper.create(dbFilePath, archiveConfig, xFilesFactor,
                                   aggregationMethod,
                                   settings.WHISPER_SPARSE_CREATE,
                                   settings.WHISPER_FALLOCATE_CREATE)
                    instrumentation.increment('creates')
                except:
                    log.err("Error creating %s" % (dbFilePath))
                    continue
            # If we've got a rate limit configured lets makes sure we enforce it
            if UPDATE_BUCKET:
                UPDATE_BUCKET.drain(1, blocking=True)
            try:
                t1 = time.time()
                whisper.update_many(dbFilePath, datapoints)
                updateTime = time.time() - t1
            except Exception:
                log.msg("Error writing to %s" % (dbFilePath))
                log.err()
                instrumentation.increment('errors')
            else:
                pointCount = len(datapoints)
                instrumentation.increment('committedPoints', pointCount)
                instrumentation.append('updateTimes', updateTime)
                if settings.LOG_UPDATES:
                    log.updates("wrote %d datapoints for %s in %.5f seconds" %
                                (pointCount, metric, updateTime))

        # Avoid churning CPU when only new metrics are in the cache
        if not dataWritten:
            time.sleep(0.1)
Example #48
0
 def successHandler(result, *args, **kw):
     log.msg("Tagged %s: %s in %s" % (metric, result, time.time() - t))
Example #49
0
 def _check_available_space(self):
     if state.cacheTooFull and self.size < settings.CACHE_SIZE_LOW_WATERMARK:
         log.msg("MetricCache below watermark: self.size=%d" % self.size)
         events.cacheSpaceAvailable()
Example #50
0
    def postOptions(self):
        global settings

        program = self.parent.subCommand

        # Use provided pidfile (if any) as default for configuration. If it's
        # set to 'twistd.pid', that means no value was provided and the default
        # was used.
        pidfile = self.parent["pidfile"]
        if pidfile.endswith("twistd.pid"):
            pidfile = None
        self["pidfile"] = pidfile

        # Enforce a default umask of '022' if none was set.
        if not self.parent.has_key("umask") or self.parent["umask"] is None:
            self.parent["umask"] = 022

        # Read extra settings from the configuration file.
        program_settings = read_config(program, self)
        settings.update(program_settings)
        settings["program"] = program

        # Set process uid/gid by changing the parent config, if a user was
        # provided in the configuration file.
        if settings.USER:
            self.parent["uid"], self.parent["gid"] = (pwd.getpwnam(
                settings.USER)[2:4])

        # Set the pidfile in parent config to the value that was computed by
        # C{read_config}.
        self.parent["pidfile"] = settings["pidfile"]

        storage_schemas = join(settings["CONF_DIR"], "storage-schemas.conf")
        if not exists(storage_schemas):
            print "Error: missing required config %s" % storage_schemas
            sys.exit(1)

        if settings.WHISPER_AUTOFLUSH:
            log.msg("Enabling Whisper autoflush")
            whisper.AUTOFLUSH = True

        if settings.WHISPER_LOCK_WRITES:
            if whisper.CAN_LOCK:
                log.msg("Enabling Whisper file locking")
                whisper.LOCK = True
            else:
                log.err(
                    "WHISPER_LOCK_WRITES is enabled but import of fcntl module failed."
                )

        if not "action" in self:
            self["action"] = "start"
        self.handleAction()

        # If we are not running in debug mode or non-daemon mode, then log to a
        # directory, otherwise log output will go to stdout. If parent options
        # are set to log to syslog, then use that instead.
        if not self["debug"]:
            if self.parent.get("syslog", None):
                log.logToSyslog(self.parent["prefix"])
            elif not self.parent["nodaemon"]:
                logdir = settings.LOG_DIR
                if not isdir(logdir):
                    os.makedirs(logdir)
                log.logToDir(logdir)

        if self["whitelist"] is None:
            self["whitelist"] = join(settings["CONF_DIR"], "whitelist.conf")
        settings["whitelist"] = self["whitelist"]

        if self["blacklist"] is None:
            self["blacklist"] = join(settings["CONF_DIR"], "blacklist.conf")
        settings["blacklist"] = self["blacklist"]
Example #51
0
def writeCachedDataPoints():
    "Write datapoints until the MetricCache is completely empty"

    cache = MetricCache()
    while cache:
        dataWritten = False

        for (metric, datapoints, dbFileExists) in optimalWriteOrder():
            dataWritten = True

            if not dbFileExists:
                archiveConfig = None
                xFilesFactor, aggregationMethod = None, None

                for schema in SCHEMAS:
                    if schema.matches(metric):
                        if settings.LOG_CREATES:
                            log.creates('new metric %s matched schema %s' %
                                        (metric, schema.name))
                        archiveConfig = [
                            archive.getTuple() for archive in schema.archives
                        ]
                        break

                for schema in AGGREGATION_SCHEMAS:
                    if schema.matches(metric):
                        if settings.LOG_CREATES:
                            log.creates(
                                'new metric %s matched aggregation schema %s' %
                                (metric, schema.name))
                        xFilesFactor, aggregationMethod = schema.archives
                        break

                if not archiveConfig:
                    raise Exception(
                        "No storage schema matched the metric '%s', check your storage-schemas.conf file."
                        % metric)

                if settings.LOG_CREATES:
                    log.creates(
                        "creating database metric %s (archive=%s xff=%s agg=%s)"
                        % (metric, archiveConfig, xFilesFactor,
                           aggregationMethod))
                try:
                    state.database.create(metric, archiveConfig, xFilesFactor,
                                          aggregationMethod)
                    instrumentation.increment('creates')
                except Exception, e:
                    log.err()
                    log.msg("Error creating %s: %s" % (metric, e))
                    instrumentation.increment('errors')
                    continue
            # If we've got a rate limit configured lets makes sure we enforce it
            if UPDATE_BUCKET:
                UPDATE_BUCKET.drain(1, blocking=True)
            try:
                t1 = time.time()
                # If we have duplicated points, always pick the last. update_many()
                # has no guaranted behavior for that, and in fact the current implementation
                # will keep the first point in the list.
                datapoints = dict(datapoints).items()
                state.database.write(metric, datapoints)
                updateTime = time.time() - t1
            except Exception, e:
                log.err()
                log.msg("Error writing to %s: %s" % (metric, e))
                instrumentation.increment('errors')
            else:
                pointCount = len(datapoints)
                instrumentation.increment('committedPoints', pointCount)
                instrumentation.append('updateTimes', updateTime)
                if settings.LOG_UPDATES:
                    log.updates("wrote %d datapoints for %s in %.5f seconds" %
                                (pointCount, metric, updateTime))
Example #52
0
 def errorHandler(err):
     log.msg("Error tagging %s: %s" % (metric, err.getErrorMessage()))
Example #53
0
 def errorHandler(err):
     log.msg("Error tagging %s: %s" % (metric, err))
Example #54
0
from threading import Thread
from twisted.internet import reactor
from twisted.internet.task import LoopingCall
import whisper
from carbon.cache import MetricCache
from carbon.storage import getFilesystemPath, loadStorageSchemas
from carbon.conf import settings
from carbon.instrumentation import increment, append
from carbon import log
try:
    import cPickle as pickle
except ImportError:
    import pickle

if settings.WHISPER_AUTOFLUSH:
    log.msg("enabling whisper autoflush")
    whisper.AUTOFLUSH = True

lastCreateInterval = 0
createCount = 0


def optimalWriteOrder():
    "Generates metrics with the most cached values first and applies a soft rate limit on new metrics"
    global lastCreateInterval
    global createCount
    metrics = [(metric, len(datapoints))
               for metric, datapoints in MetricCache.items()]

    t = time.time()
    metrics.sort(key=lambda item: item[1],
Example #55
0
from carbon import state
from carbon.cache import MetricCache
from carbon.storage import getFilesystemPath, loadStorageSchemas,\
    loadAggregationSchemas
from carbon.conf import settings
from carbon import log, events, instrumentation
from carbon.util import TokenBucket

from twisted.internet import reactor
from twisted.internet.task import LoopingCall
from twisted.application.service import Service

try:
    import signal
except ImportError:
    log.msg("Couldn't import signal module")

SCHEMAS = loadStorageSchemas()
AGGREGATION_SCHEMAS = loadAggregationSchemas()
CACHE_SIZE_LOW_WATERMARK = settings.MAX_CACHE_SIZE * 0.95

# Inititalize token buckets so that we can enforce rate limits on creates and
# updates if the config wants them.
CREATE_BUCKET = None
UPDATE_BUCKET = None
if settings.MAX_CREATES_PER_MINUTE != float('inf'):
    capacity = settings.MAX_CREATES_PER_MINUTE
    fill_rate = float(settings.MAX_CREATES_PER_MINUTE) / 60
    CREATE_BUCKET = TokenBucket(capacity, fill_rate)

if settings.MAX_UPDATES_PER_SECOND != float('inf'):
Example #56
0
File: conf.py Project: vrg0/carbon
    def postOptions(self):
        global settings

        program = self.parent.subCommand

        # Use provided pidfile (if any) as default for configuration. If it's
        # set to 'twistd.pid', that means no value was provided and the default
        # was used.
        pidfile = self.parent["pidfile"]
        if pidfile.endswith("twistd.pid"):
            pidfile = None
        self["pidfile"] = pidfile

        # Enforce a default umask of '022' if none was set.
        if "umask" not in self.parent or self.parent["umask"] is None:
            self.parent["umask"] = 0o022

        # Read extra settings from the configuration file.
        program_settings = read_config(program, self)
        settings.update(program_settings)
        settings["program"] = program

        # Normalize and expand paths
        def cleanpath(path):
            return os.path.normpath(os.path.expanduser(path))

        settings["STORAGE_DIR"] = cleanpath(settings["STORAGE_DIR"])
        settings["LOCAL_DATA_DIR"] = cleanpath(settings["LOCAL_DATA_DIR"])
        settings["WHITELISTS_DIR"] = cleanpath(settings["WHITELISTS_DIR"])
        settings["PID_DIR"] = cleanpath(settings["PID_DIR"])
        settings["LOG_DIR"] = cleanpath(settings["LOG_DIR"])
        settings["pidfile"] = cleanpath(settings["pidfile"])

        # Set process uid/gid by changing the parent config, if a user was
        # provided in the configuration file.
        if settings.USER:
            self.parent["uid"], self.parent["gid"] = (pwd.getpwnam(
                settings.USER)[2:4])

        # Set the pidfile in parent config to the value that was computed by
        # C{read_config}.
        self.parent["pidfile"] = settings["pidfile"]

        storage_schemas = join(settings["CONF_DIR"], "storage-schemas.conf")
        if not exists(storage_schemas):
            print("Error: missing required config %s" % storage_schemas)
            sys.exit(1)

        if settings.CACHE_WRITE_STRATEGY not in ('timesorted', 'sorted', 'max',
                                                 'bucketmax', 'naive'):
            log.err(
                "%s is not a valid value for CACHE_WRITE_STRATEGY, defaulting to %s"
                % (settings.CACHE_WRITE_STRATEGY,
                   defaults['CACHE_WRITE_STRATEGY']))
        else:
            log.msg("Using %s write strategy for cache" %
                    settings.CACHE_WRITE_STRATEGY)

        # Database-specific settings
        database = settings.DATABASE
        if database not in TimeSeriesDatabase.plugins:
            print("No database plugin implemented for '%s'" % database)
            raise SystemExit(1)

        database_class = TimeSeriesDatabase.plugins[database]
        state.database = database_class(settings)

        settings.CACHE_SIZE_LOW_WATERMARK = settings.MAX_CACHE_SIZE * 0.95

        if "action" not in self:
            self["action"] = "start"
        self.handleAction()

        # If we are not running in debug mode or non-daemon mode, then log to a
        # directory, otherwise log output will go to stdout. If parent options
        # are set to log to syslog, then use that instead.
        if not self["debug"]:
            if self.parent.get("syslog", None):
                prefix = "%s-%s[%d]" % (program, self["instance"], os.getpid())
                log.logToSyslog(prefix)
            elif not self.parent["nodaemon"]:
                logdir = settings.LOG_DIR
                if not isdir(logdir):
                    os.makedirs(logdir)
                    if settings.USER:
                        # We have not yet switched to the specified user,
                        # but that user must be able to create files in this
                        # directory.
                        os.chown(logdir, self.parent["uid"],
                                 self.parent["gid"])
                log.logToDir(logdir)

        if self["whitelist"] is None:
            self["whitelist"] = join(settings["CONF_DIR"], "whitelist.conf")
        settings["whitelist"] = self["whitelist"]

        if self["blacklist"] is None:
            self["blacklist"] = join(settings["CONF_DIR"], "blacklist.conf")
        settings["blacklist"] = self["blacklist"]
Example #57
0
def reloadStorageSchemas():
  global SCHEMAS
  try:
    SCHEMAS = loadStorageSchemas()
  except Exception as e:
    log.msg("Failed to reload storage SCHEMAS: %s" % (e))
Example #58
0
def shutdownModifyUpdateSpeed():
    try:
        settings.MAX_UPDATES_PER_SECOND = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN
        log.msg("Carbon shutting down.  Changed the update rate to: " + str(settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN))
    except KeyError:
        log.msg("Carbon shutting down.  Update rate not changed")
Example #59
0
 def startShutdown(results):
     log.msg("startShutdown(%s)" % str(results))
     allStopped = client_manager.stopAllClients()
     allStopped.addCallback(shutdown)
Example #60
0
def writeCachedDataPoints():
    "Write datapoints until the MetricCache is completely empty"
    updates = 0
    lastSecond = 0

    while MetricCache:
        dataWritten = False

        for (metric, datapoints, dbFilePath,
             dbFileExists) in optimalWriteOrder():
            dataWritten = True

            if not dbFileExists:
                archiveConfig = None
                xFilesFactor, aggregationMethod = None, None

                for schema in schemas:
                    if schema.matches(metric):
                        log.creates('new metric %s matched schema %s' %
                                    (metric, schema.name))
                        archiveConfig = [
                            archive.getTuple() for archive in schema.archives
                        ]
                        break

                for schema in agg_schemas:
                    if schema.matches(metric):
                        log.creates(
                            'new metric %s matched aggregation schema %s' %
                            (metric, schema.name))
                        xFilesFactor, aggregationMethod = schema.archives
                        break

                if not archiveConfig:
                    raise Exception(
                        "No storage schema matched the metric '%s', check your storage-schemas.conf file."
                        % metric)

                dbDir = dirname(dbFilePath)
                try:
                    os.makedirs(dbDir, 0755)
                except OSError as e:
                    log.err("%s" % e)
                log.creates(
                    "creating database file %s (archive=%s xff=%s agg=%s)" %
                    (dbFilePath, archiveConfig, xFilesFactor,
                     aggregationMethod))
                whisper.create(dbFilePath, archiveConfig, xFilesFactor,
                               aggregationMethod,
                               settings.WHISPER_SPARSE_CREATE,
                               settings.WHISPER_FALLOCATE_CREATE)
                instrumentation.increment('creates')

            try:
                t1 = time.time()
                whisper.update_many(dbFilePath, datapoints)
                t2 = time.time()
                updateTime = t2 - t1
            except:
                log.msg("Error writing to %s" % (dbFilePath))
                log.err()
                instrumentation.increment('errors')
            else:
                pointCount = len(datapoints)
                instrumentation.increment('committedPoints', pointCount)
                instrumentation.append('updateTimes', updateTime)

                if settings.LOG_UPDATES:
                    log.updates("wrote %d datapoints for %s in %.5f seconds" %
                                (pointCount, metric, updateTime))

                # Rate limit update operations
                thisSecond = int(t2)

                if thisSecond != lastSecond:
                    lastSecond = thisSecond
                    updates = 0
                else:
                    updates += 1
                    if updates >= settings.MAX_UPDATES_PER_SECOND:
                        time.sleep(int(t2 + 1) - t2)

        # Avoid churning CPU when only new metrics are in the cache
        if not dataWritten:
            time.sleep(0.1)