def process(metric, datapoint): increment('datapointsReceived') for rule in RewriteRuleManager.preRules: metric = rule.apply(metric) aggregate_metrics = [] for rule in RuleManager.rules: aggregate_metric = rule.get_aggregate_metric(metric) if aggregate_metric is None: continue else: aggregate_metrics.append(aggregate_metric) buffer = BufferManager.get_buffer(aggregate_metric) if not buffer.configured: buffer.configure_aggregation(rule.frequency, rule.aggregation_func) buffer.input(datapoint) for rule in RewriteRuleManager.postRules: metric = rule.apply(metric) if metric not in aggregate_metrics: events.metricGenerated(metric, datapoint) if len(aggregate_metrics) == 0: log.msg("Couldn't match metric %s with any aggregation rule. Passing on un-aggregated." % metric)
def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict( config.items(section) ) pattern = options.get('pattern') retentions = options['retentions'].split(',') archives = [ Archive.fromString(s) for s in retentions ] if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Section missing 'pattern': %s" % section) continue archiveList = [a.getTuple() for a in archives] try: whisper.validateArchiveList(archiveList) schemaList.append(mySchema) except whisper.InvalidConfiguration, e: log.msg("Invalid schemas found in %s: %s" % (section, e) )
def connectionLost(self, reason): log.msg('stdin disconnected') def startShutdown(results): log.msg("startShutdown(%s)" % str(results)) allStopped = client_manager.stopAllClients() allStopped.addCallback(shutdown) firstConnectsAttempted.addCallback(startShutdown)
def optimalWriteOrder(): "Generates metrics with the most cached values first and applies a soft rate limit on new metrics" global lastCreateInterval global createCount metrics = [ (metric, len(datapoints)) for metric,datapoints in MetricCache.items() ] t = time.time() metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) for metric, queueSize in metrics: dbFilePath = getFilesystemPath(metric) dbFileExists = exists(dbFilePath) if not dbFileExists: createCount += 1 now = time.time() if now - lastCreateInterval >= 60: lastCreateInterval = now createCount = 1 elif createCount >= settings.MAX_CREATES_PER_MINUTE: continue try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs yield (metric, datapoints, dbFilePath, dbFileExists)
def _generate_queue(): while True: t = time.time() metric_counts = sorted(self.cache.counts, key=lambda x: x[1]) log.msg("Sorted %d cache queues in %.6f seconds" % (len(metric_counts), time.time() - t)) while metric_counts: yield itemgetter(0)(metric_counts.pop())
def _flush(prefix=None): """ Write/create whisped files at maximal speed """ assert(prefix==None or hasattr(prefix, 'startswith')) log.msg("flush started (prefix: %s)" % prefix) started = time.time() metrics = MetricCache.counts() updates = 0 write_lock.acquire() try: for metric, queueSize in metrics: if prefix and not metric.startswith(prefix): continue dbFilePath = getFilesystemPath(metric) dbFileExists = exists(dbFilePath) try: datapoints = MetricCache.pop(metric) except KeyError: continue if not createWhisperFile(metric, dbFilePath, dbFileExists): continue if not writeWhisperFile(dbFilePath, datapoints): continue updates += 1 finally: write_lock.release() log.msg('flush finished (updates: %d, time: %.5f sec)' % (updates, time.time()-started)) return updates
def reloadAggregationSchemas(): global AGGREGATION_SCHEMAS try: AGGREGATION_SCHEMAS = loadAggregationSchemas() except Exception: log.msg("Failed to reload aggregation SCHEMAS") log.err()
def reloadAggregationSchemas(): global agg_schemas try: agg_schemas = loadAggregationSchemas() except: log.msg("Failed to reload aggregation schemas") log.err()
def optimalWriteOrder(): "Generates metrics with the most cached values first and applies a soft rate limit on new metrics" global lastCreateInterval global createCount metrics = MetricCache.counts() t = time.time() metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) for metric, queueSize in metrics: if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() # Let our persister do its own check, and ignore the metric if needed. if not persister.pre_get_datapoints_check(metric): continue try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs dbInfo = persister.get_dbinfo(metric) dbIdentifier = dbInfo[0] dbExists = dbInfo[1] yield (metric, datapoints, dbIdentifier, dbExists)
def reloadStorageSchemas(): global schemas try: schemas = loadStorageSchemas() except: log.msg("Failed to reload storage schemas") log.err()
def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict(config.items(section)) pattern = options.get('pattern') try: retentions = options['retentions'].split(',') archives = [Archive.fromString(s) for s in retentions] except KeyError: log.err("Schema %s missing 'retentions', skipping" % section) continue if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Schema %s missing 'pattern', skipping" % section) continue archiveList = [a.getTuple() for a in archives] try: if state.database is not None: state.database.validateArchiveList(archiveList) schemaList.append(mySchema) except ValueError as e: log.msg("Invalid schemas found in %s: %s" % (section, e)) schemaList.append(defaultSchema) return schemaList
def reloadStorageSchemas(): global SCHEMAS try: SCHEMAS = loadStorageSchemas() except Exception: log.msg("Failed to reload storage SCHEMAS") log.err()
def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict(config.items(section)) matchAll = options.get('match-all') pattern = options.get('pattern') listName = options.get('list') retentions = options['retentions'].split(',') archives = [Archive.fromString(s) for s in retentions] if matchAll: mySchema = DefaultSchema(section, archives) elif pattern: mySchema = PatternSchema(section, pattern, archives) elif listName: mySchema = ListSchema(section, listName, archives) archiveList = [a.getTuple() for a in archives] try: whisper.validateArchiveList(archiveList) schemaList.append(mySchema) except whisper.InvalidConfiguration, e: log.msg("Invalid schemas found in %s: %s" % (section, e))
def process(self, metric, datapoint): increment('datapointsReceived') aggregate_metrics = set() for rule in RuleManager.rules: aggregate_metric = rule.get_aggregate_metric(metric) if aggregate_metric is None: continue else: aggregate_metrics.add(aggregate_metric) values_buffer = BufferManager.get_buffer(aggregate_metric) if not values_buffer.configured: values_buffer.configure_aggregation(rule.frequency, rule.aggregation_func) values_buffer.input(datapoint) if settings.FORWARD_ALL and metric not in aggregate_metrics: if settings.LOG_AGGREGATOR_MISSES and len(aggregate_metrics) == 0: log.msg( "Couldn't match metric %s with any aggregation rule. Passing on un-aggregated." % metric) yield (metric, datapoint)
def processMessage(self, message, channel): """Parse a message and post it as a metric.""" if self.factory.verbose: log.listener("Message received: %s" % (message,)) metric = message.routing_key for line in message.content.body.split("\n"): line = line.strip() if not line: continue try: #log.listener("Trying...") # if settings.get("AMQP_METRIC_NAME_IN_BODY", False): # metric, value, timestamp = line.split() # log.listener("Metric in body") # else: # log.listener("Metric not in body") value, timestamp = line.split() #log.listener("Value:%f Timestamp:%f"%(float(value),float(timestamp))) datapoint = ( float(timestamp), float(value) ) except ValueError: log.listener("invalid message line: %s" % (line,)) continue events.metricReceived(metric, datapoint) if self.factory.verbose: log.listener("Metric posted: %s %s %s" % (metric, value, timestamp,)) log.msg("Acking...") channel.basic_ack(delivery_tag = message.delivery_tag, multiple = False) log.msg("Ack Done!!")
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" while MetricCache: dataWritten = False for (metric, datapoints, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) log.creates("creating database metric %s (archive=%s xff=%s agg=%s)" % (metric, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) instrumentation.increment('creates') except Exception, e: log.err() log.msg("Error creating %s: %s" % (metric, e)) instrumentation.increment('errors') continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() # If we have duplicated points, always pick the last. update_many() # has no guaranted behavior for that, and in fact the current implementation # will keep the first point in the list. datapoints = dict(datapoints).items() state.database.write(metric, datapoints) updateTime = time.time() - t1 except Exception, e: log.err() log.msg("Error writing to %s: %s" % (metric, e)) instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))
def createClientConnections(hosts): for (server, port, instance) in hosts: log.msg("Connecting to destination server %s:%d" % (server, port)) factory = MetricSenderFactory(server, port) clientConnections.append(factory) reactor.connectTCP(server, port, factory) clientConnections.sort(key=lambda f: f.remoteAddr) # normalize the order
def backIntoCache(metricList): for (metric, datapoints) in metricList: for point in datapoints: try: MetricCache.store(metric, point) except: datapoints.append(point) log.msg("Failed to publish to RabbitMQ. Pushed the metrics back to cache")
def receive_loop(self): queue = yield self.queue(self.consumer_tag) channel = yield self.channel(2) while True: log.msg("Getting msg from queue") msg = yield queue.get() log.msg("Got it!!") self.processMessage(msg, channel)
def reloadAggregationSchemas(first_run=False): global agg_schemas try: agg_schemas = loadAggregationSchemas() except Exception, e: if first_run: raise e log.msg("Failed to reload aggregation schemas") log.err()
def reloadStorageSchemas(first_run=False): global schemas try: schemas = loadStorageSchemas() except Exception, e: if first_run: raise e log.msg("Failed to reload storage schemas") log.err()
def writeWhisperFile(dbFilePath, datapoints): try: whisper.update_many(dbFilePath, datapoints) except: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') return False return True
def _generate_queue(): while True: t = time.time() metric_counts = sorted(self.cache.counts, key=lambda x: x[1]) if settings.LOG_CACHE_QUEUE_SORTS: log.msg("Sorted %d cache queues in %.6f seconds" % (len(metric_counts), time.time() - t)) while metric_counts: yield itemgetter(0)(metric_counts.pop()) log.msg("Queue consumed in %.6f seconds" % (time.time() - t))
def process(self, metric, datapoint): # normalize metric name (reorder tags) try: metric = TaggedSeries.parse(metric).path except Exception as err: log.msg('Error parsing metric %s: %s' % (metric, err)) self.cache.store(metric, datapoint) return Processor.NO_OUTPUT
def startService(self): if 'signal' in globals().keys(): log.msg("Installing SIG_IGN for SIGHUP") signal.signal(signal.SIGHUP, signal.SIG_IGN) self.storage_reload_task.start(60, False) self.aggregation_reload_task.start(60, False) reactor.addSystemEventTrigger('before', 'shutdown', shutdownModifyUpdateSpeed) reactor.callInThread(writeForever) Service.startService(self)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) log.creates("creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) instrumentation.increment('creates') except Exception: log.err("Error creating %s" % (dbFilePath)) continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() state.database.write(metric, datapoints) updateTime = time.time() - t1 except Exception: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)
def _generate_queue(): while True: t = time.time() metric_lw = sorted(self.cache.watermarks, key=lambda x: x[1], reverse=True) if settings.LOG_CACHE_QUEUE_SORTS: log.msg("Sorted %d cache queues in %.6f seconds" % (len(metric_lw), time.time() - t)) while metric_lw: yield itemgetter(0)(metric_lw.pop()) log.msg("Queue consumed in %.6f seconds" % (time.time() - t))
def shutdownModifyUpdateSpeed(): try: shut = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN if UPDATE_BUCKET: UPDATE_BUCKET.setCapacityAndFillRate(shut,shut) if CREATE_BUCKET: CREATE_BUCKET.setCapacityAndFillRate(shut,shut) log.msg("Carbon shutting down. Changed the update rate to: " + str(settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN)) except KeyError: log.msg("Carbon shutting down. Update rate not changed")
def publishForever(channel, exchange): while reactor.running: # msg = yield queue.get() # self.processMessage(msg, channel) try: log.msg("Calling function writeCachedDataPoints") writeCachedDataPoints(channel, exchange) except: log.err() log.msg("Time to sleep") time.sleep(10)
def process(self, metric, datapoint): if settings.TAG_RELAY_NORMALIZED: # normalize metric name try: metric = TaggedSeries.parse(metric).path except Exception as err: log.msg('Error parsing metric %s: %s' % (metric, err)) # continue anyway with processing the unnormalized metric for robustness state.client_manager.sendDatapoint(metric, datapoint) return pipeline.Processor.NO_OUTPUT
def store(self, metric, datapoint): self.setdefault(metric, {}) timestamp, value = datapoint if timestamp not in self[metric]: # Not a duplicate, hence process if cache is not full if self.is_full: log.msg("MetricCache is full: self.size=%d" % self.size) events.cacheFull() else: self.size += 1 self[metric][timestamp] = value else: # Updating a duplicate does not increase the cache size self[metric][timestamp] = value
def __init__(self, settings): super(WhisperDatabase, self).__init__(settings) self.data_dir = settings.LOCAL_DATA_DIR self.sparse_create = settings.WHISPER_SPARSE_CREATE self.fallocate_create = settings.WHISPER_FALLOCATE_CREATE if settings.WHISPER_AUTOFLUSH: log.msg("Enabling Whisper autoflush") whisper.AUTOFLUSH = True if settings.WHISPER_FALLOCATE_CREATE: if whisper.CAN_FALLOCATE: log.msg("Enabling Whisper fallocate support") else: log.err("WHISPER_FALLOCATE_CREATE is enabled but linking failed.") if settings.WHISPER_LOCK_WRITES: if whisper.CAN_LOCK: log.msg("Enabling Whisper file locking") whisper.LOCK = True else: log.err("WHISPER_LOCK_WRITES is enabled but import of fcntl module failed.") if settings.WHISPER_FADVISE_RANDOM: try: if whisper.CAN_FADVISE: log.msg("Enabling Whisper fadvise_random support") whisper.FADVISE_RANDOM = True else: log.err("WHISPER_FADVISE_RANDOM is enabled but import of ftools module failed.") except AttributeError: log.err("WHISPER_FADVISE_RANDOM is enabled but skipped because it is not compatible " + "with the version of Whisper.")
def shutdownModifyUpdateSpeed(): try: shut = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN if UPDATE_BUCKET: UPDATE_BUCKET.setCapacityAndFillRate(shut, shut) if CREATE_BUCKET: CREATE_BUCKET.setCapacityAndFillRate(shut, shut) log.msg("Carbon shutting down. Changed the update rate to: " + str(settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN)) except KeyError: log.msg("Carbon shutting down. Update rate not changed") # Also set MIN_TIMESTAMP_LAG to 0 to avoid waiting for nothing. settings.MIN_TIMESTAMP_LAG = 0
def tag(self, *metrics): from carbon.http import httpRequest log.debug("Tagging %s" % ', '.join(metrics), type='tagdb') t = time.time() try: httpRequest(self.graphite_url + '/tags/tagMultiSeries', [('path', metric) for metric in metrics]) log.debug("Tagged %s in %s" % (', '.join(metrics), time.time() - t), type='tagdb') except Exception as err: log.msg("Error tagging %s: %s" % (', '.join(metrics), err), type='tagdb')
def _generate_queue(): while True: t = time.time() metric_lw = sorted(self.cache.watermarks, key=lambda x: x[1], reverse=True) size = len(metric_lw) if settings.LOG_CACHE_QUEUE_SORTS and size: log.msg("Sorted %d cache queues in %.6f seconds" % (size, time.time() - t)) while metric_lw: yield itemgetter(0)(metric_lw.pop()) if settings.LOG_CACHE_QUEUE_SORTS and size: log.msg("Queue consumed in %.6f seconds" % (time.time() - t))
def tag(self, metric): from carbon.http import httpRequest log.msg("Tagging %s" % metric) t = time.time() def successHandler(result, *args, **kw): log.msg("Tagged %s: %s in %s" % (metric, result, time.time() - t)) def errorHandler(err): log.msg("Error tagging %s: %s" % (metric, err)) httpRequest(self.graphite_url + '/tags/tagSeries', { 'path': metric }).addCallback(successHandler).addErrback(errorHandler)
def _generate_queue(): while True: t = time.time() metric_lw = sorted(self.cache.watermarks, key=lambda x: x[1], reverse=True) if settings.MIN_TIMESTAMP_LAG: metric_lw = [x for x in metric_lw if t - x[1] > settings.MIN_TIMESTAMP_LAG] size = len(metric_lw) if settings.LOG_CACHE_QUEUE_SORTS and size: log.msg("Sorted %d cache queues in %.6f seconds" % (size, time.time() - t)) if not metric_lw: # If there is nothing to do give a chance to sleep to the reader. yield None while metric_lw: yield itemgetter(0)(metric_lw.pop()) if settings.LOG_CACHE_QUEUE_SORTS and size: log.msg("Queue consumed in %.6f seconds" % (time.time() - t))
def __init__(self, settings): super(CeresDatabase, self).__init__(settings) self.data_dir = settings.LOCAL_DATA_DIR ceres.setDefaultNodeCachingBehavior(settings.CERES_NODE_CACHING_BEHAVIOR) ceres.setDefaultSliceCachingBehavior(settings.CERES_SLICE_CACHING_BEHAVIOR) ceres.MAX_SLICE_GAP = int(settings.CERES_MAX_SLICE_GAP) if settings.CERES_LOCK_WRITES: if ceres.CAN_LOCK: log.msg("Enabling Ceres file locking") ceres.LOCK_WRITES = True else: log.err("CERES_LOCK_WRITES is enabled but import of fcntl module failed.") self.tree = ceres.CeresTree(self.data_dir)
def optimalWriteOrder(): """Generates metrics with the most cached values first and applies a soft rate limit on new metrics""" global lastCreateInterval global createCount metrics = MetricCache.counts() t = time.time() metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.debug("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) for metric, queueSize in metrics: if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() dbFilePath = getFilesystemPath(metric) dbFileExists = exists(dbFilePath) if not dbFileExists: createCount += 1 now = time.time() if now - lastCreateInterval >= 60: lastCreateInterval = now createCount = 1 elif createCount >= settings.MAX_CREATES_PER_MINUTE: # dropping queued up datapoints for new metrics prevents filling up the entire cache # when a bunch of new metrics are received. try: MetricCache.pop(metric) except KeyError: pass continue try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs yield (metric, datapoints, dbFilePath, dbFileExists)
def loadAggregationSchemas(): # NOTE: This abuses the Schema classes above, and should probably be refactored. schemaList = [] config = OrderedConfigParser() try: config.read(STORAGE_AGGREGATION_CONFIG) except (IOError, CarbonConfigException): log.msg("%s not found or wrong perms, ignoring." % STORAGE_AGGREGATION_CONFIG) for section in config.sections(): options = dict(config.items(section)) matchAll = options.get('match-all') pattern = options.get('pattern') listName = options.get('list') xFilesFactor = options.get('xfilesfactor') aggregationMethod = options.get('aggregationmethod') try: if xFilesFactor is not None: xFilesFactor = float(xFilesFactor) assert 0 <= xFilesFactor <= 1 if aggregationMethod is not None: assert aggregationMethod in whisper.aggregationMethods except: log.msg("Invalid schemas found in %s." % section) continue archives = (xFilesFactor, aggregationMethod) if matchAll: mySchema = DefaultSchema(section, archives) elif pattern: mySchema = PatternSchema(section, pattern, archives) elif listName: mySchema = ListSchema(section, listName, archives) schemaList.append(mySchema) schemaList.append(defaultAggregation) return schemaList
def loadAggregationSchemas(): # NOTE: This abuses the Schema classes above, and should probably be refactored. schemaList = [] config = OrderedConfigParser() try: config.read(STORAGE_AGGREGATION_CONFIG) except (IOError, CarbonConfigException): log.msg("%s not found or wrong perms, ignoring." % STORAGE_AGGREGATION_CONFIG) for section in config.sections(): options = dict(config.items(section)) pattern = options.get('pattern') xFilesFactor = options.get('xfilesfactor') aggregationMethod = options.get('aggregationmethod') try: if xFilesFactor is not None: xFilesFactor = float(xFilesFactor) assert 0 <= xFilesFactor <= 1 if aggregationMethod is not None: if state.database is not None: assert aggregationMethod in state.database.aggregationMethods except ValueError: log.msg("Invalid schemas found in %s." % section) continue archives = (xFilesFactor, aggregationMethod) if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Section missing 'pattern': %s" % section) continue schemaList.append(mySchema) schemaList.append(defaultAggregation) return schemaList
def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict(config.items(section)) pattern = options.get('pattern') try: retentions = options['retentions'].split(',') except KeyError: log.err("Schema %s missing 'retentions', skipping" % section) continue try: archives = [Archive.fromString(s) for s in retentions] except ValueError as exc: log.err("{msg} in section [{section}] in {fn}".format( msg=exc, section=section.title(), fn=STORAGE_SCHEMAS_CONFIG)) raise SystemExit(1) if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Schema %s missing 'pattern', skipping" % section) continue archiveList = [a.getTuple() for a in archives] try: if state.database is not None: state.database.validateArchiveList(archiveList) schemaList.append(mySchema) except ValueError as e: log.msg("Invalid schemas found in %s: %s" % (section, e)) schemaList.append(defaultSchema) return schemaList
def __init__(self, settings): self.data_dir = settings.LOCAL_DATA_DIR self.sparse_create = settings.WHISPER_SPARSE_CREATE self.fallocate_create = settings.WHISPER_FALLOCATE_CREATE if settings.WHISPER_AUTOFLUSH: log.msg("Enabling Whisper autoflush") whisper.AUTOFLUSH = True if settings.WHISPER_FALLOCATE_CREATE: if whisper.CAN_FALLOCATE: log.msg("Enabling Whisper fallocate support") else: log.err( "WHISPER_FALLOCATE_CREATE is enabled but linking failed." ) if settings.WHISPER_LOCK_WRITES: if whisper.CAN_LOCK: log.msg("Enabling Whisper file locking") whisper.LOCK = True else: log.err( "WHISPER_LOCK_WRITES is enabled but import of fcntl module failed." )
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" cache = MetricCache() while cache: (metric, datapoints) = cache.drain_metric() if metric is None: # end the loop break dbFileExists = state.database.exists(metric) if not dbFileExists: if CREATE_BUCKET and not CREATE_BUCKET.drain(1): # If our tokenbucket doesn't have enough tokens available to create a new metric # file then we'll just drop the metric on the ground and move on to the next # metric. # XXX This behavior should probably be configurable to no tdrop metrics # when rate limitng unless our cache is too big or some other legit # reason. instrumentation.increment('droppedCreates') continue archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception(("No storage schema matched the metric '%s'," " check your storage-schemas.conf file.") % metric) if settings.LOG_CREATES: log.creates("creating database metric %s (archive=%s xff=%s agg=%s)" % (metric, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) if settings.ENABLE_TAGS: tagQueue.add(metric) instrumentation.increment('creates') except Exception as e: log.err() log.msg("Error creating %s: %s" % (metric, e)) instrumentation.increment('errors') continue # If we've got a rate limit configured lets makes sure we enforce it waitTime = 0 if UPDATE_BUCKET: t1 = time.time() yield UPDATE_BUCKET.drain(1, blocking=True) waitTime = time.time() - t1 try: t1 = time.time() # If we have duplicated points, always pick the last. update_many() # has no guaranted behavior for that, and in fact the current implementation # will keep the first point in the list. datapoints = dict(datapoints).items() state.database.write(metric, datapoints) if settings.ENABLE_TAGS: tagQueue.update(metric) updateTime = time.time() - t1 except Exception as e: log.err() log.msg("Error writing to %s: %s" % (metric, e)) instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: if waitTime > 0.001: log.updates("wrote %d datapoints for %s in %.5f seconds after waiting %.5f seconds" % ( pointCount, metric, updateTime, waitTime)) else: log.updates("wrote %d datapoints for %s in %.5f seconds" % ( pointCount, metric, updateTime))
def reloadAggregationSchemas(): global AGGREGATION_SCHEMAS try: AGGREGATION_SCHEMAS = loadAggregationSchemas() except Exception as e: log.msg("Failed to reload aggregation SCHEMAS: %s" % (e))
def errorHandler(err): log.msg("Error tagging %s: %s" % (', '.join(metrics), err.getErrorMessage()), type='tagdb') return err
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [ archive.getTuple() for archive in schema.archives ] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): log.creates( 'new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception( "No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) try: if not exists(dbDir): os.makedirs(dbDir) except OSError, e: log.err("%s" % e) log.creates( "creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) try: whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod, settings.WHISPER_SPARSE_CREATE, settings.WHISPER_FALLOCATE_CREATE) instrumentation.increment('creates') except: log.err("Error creating %s" % (dbFilePath)) continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) updateTime = time.time() - t1 except Exception: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)
def successHandler(result, *args, **kw): log.msg("Tagged %s: %s in %s" % (metric, result, time.time() - t))
def _check_available_space(self): if state.cacheTooFull and self.size < settings.CACHE_SIZE_LOW_WATERMARK: log.msg("MetricCache below watermark: self.size=%d" % self.size) events.cacheSpaceAvailable()
def postOptions(self): global settings program = self.parent.subCommand # Use provided pidfile (if any) as default for configuration. If it's # set to 'twistd.pid', that means no value was provided and the default # was used. pidfile = self.parent["pidfile"] if pidfile.endswith("twistd.pid"): pidfile = None self["pidfile"] = pidfile # Enforce a default umask of '022' if none was set. if not self.parent.has_key("umask") or self.parent["umask"] is None: self.parent["umask"] = 022 # Read extra settings from the configuration file. program_settings = read_config(program, self) settings.update(program_settings) settings["program"] = program # Set process uid/gid by changing the parent config, if a user was # provided in the configuration file. if settings.USER: self.parent["uid"], self.parent["gid"] = (pwd.getpwnam( settings.USER)[2:4]) # Set the pidfile in parent config to the value that was computed by # C{read_config}. self.parent["pidfile"] = settings["pidfile"] storage_schemas = join(settings["CONF_DIR"], "storage-schemas.conf") if not exists(storage_schemas): print "Error: missing required config %s" % storage_schemas sys.exit(1) if settings.WHISPER_AUTOFLUSH: log.msg("Enabling Whisper autoflush") whisper.AUTOFLUSH = True if settings.WHISPER_LOCK_WRITES: if whisper.CAN_LOCK: log.msg("Enabling Whisper file locking") whisper.LOCK = True else: log.err( "WHISPER_LOCK_WRITES is enabled but import of fcntl module failed." ) if not "action" in self: self["action"] = "start" self.handleAction() # If we are not running in debug mode or non-daemon mode, then log to a # directory, otherwise log output will go to stdout. If parent options # are set to log to syslog, then use that instead. if not self["debug"]: if self.parent.get("syslog", None): log.logToSyslog(self.parent["prefix"]) elif not self.parent["nodaemon"]: logdir = settings.LOG_DIR if not isdir(logdir): os.makedirs(logdir) log.logToDir(logdir) if self["whitelist"] is None: self["whitelist"] = join(settings["CONF_DIR"], "whitelist.conf") settings["whitelist"] = self["whitelist"] if self["blacklist"] is None: self["blacklist"] = join(settings["CONF_DIR"], "blacklist.conf") settings["blacklist"] = self["blacklist"]
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" cache = MetricCache() while cache: dataWritten = False for (metric, datapoints, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [ archive.getTuple() for archive in schema.archives ] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates( 'new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception( "No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) if settings.LOG_CREATES: log.creates( "creating database metric %s (archive=%s xff=%s agg=%s)" % (metric, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) instrumentation.increment('creates') except Exception, e: log.err() log.msg("Error creating %s: %s" % (metric, e)) instrumentation.increment('errors') continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() # If we have duplicated points, always pick the last. update_many() # has no guaranted behavior for that, and in fact the current implementation # will keep the first point in the list. datapoints = dict(datapoints).items() state.database.write(metric, datapoints) updateTime = time.time() - t1 except Exception, e: log.err() log.msg("Error writing to %s: %s" % (metric, e)) instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))
def errorHandler(err): log.msg("Error tagging %s: %s" % (metric, err.getErrorMessage()))
def errorHandler(err): log.msg("Error tagging %s: %s" % (metric, err))
from threading import Thread from twisted.internet import reactor from twisted.internet.task import LoopingCall import whisper from carbon.cache import MetricCache from carbon.storage import getFilesystemPath, loadStorageSchemas from carbon.conf import settings from carbon.instrumentation import increment, append from carbon import log try: import cPickle as pickle except ImportError: import pickle if settings.WHISPER_AUTOFLUSH: log.msg("enabling whisper autoflush") whisper.AUTOFLUSH = True lastCreateInterval = 0 createCount = 0 def optimalWriteOrder(): "Generates metrics with the most cached values first and applies a soft rate limit on new metrics" global lastCreateInterval global createCount metrics = [(metric, len(datapoints)) for metric, datapoints in MetricCache.items()] t = time.time() metrics.sort(key=lambda item: item[1],
from carbon import state from carbon.cache import MetricCache from carbon.storage import getFilesystemPath, loadStorageSchemas,\ loadAggregationSchemas from carbon.conf import settings from carbon import log, events, instrumentation from carbon.util import TokenBucket from twisted.internet import reactor from twisted.internet.task import LoopingCall from twisted.application.service import Service try: import signal except ImportError: log.msg("Couldn't import signal module") SCHEMAS = loadStorageSchemas() AGGREGATION_SCHEMAS = loadAggregationSchemas() CACHE_SIZE_LOW_WATERMARK = settings.MAX_CACHE_SIZE * 0.95 # Inititalize token buckets so that we can enforce rate limits on creates and # updates if the config wants them. CREATE_BUCKET = None UPDATE_BUCKET = None if settings.MAX_CREATES_PER_MINUTE != float('inf'): capacity = settings.MAX_CREATES_PER_MINUTE fill_rate = float(settings.MAX_CREATES_PER_MINUTE) / 60 CREATE_BUCKET = TokenBucket(capacity, fill_rate) if settings.MAX_UPDATES_PER_SECOND != float('inf'):
def postOptions(self): global settings program = self.parent.subCommand # Use provided pidfile (if any) as default for configuration. If it's # set to 'twistd.pid', that means no value was provided and the default # was used. pidfile = self.parent["pidfile"] if pidfile.endswith("twistd.pid"): pidfile = None self["pidfile"] = pidfile # Enforce a default umask of '022' if none was set. if "umask" not in self.parent or self.parent["umask"] is None: self.parent["umask"] = 0o022 # Read extra settings from the configuration file. program_settings = read_config(program, self) settings.update(program_settings) settings["program"] = program # Normalize and expand paths def cleanpath(path): return os.path.normpath(os.path.expanduser(path)) settings["STORAGE_DIR"] = cleanpath(settings["STORAGE_DIR"]) settings["LOCAL_DATA_DIR"] = cleanpath(settings["LOCAL_DATA_DIR"]) settings["WHITELISTS_DIR"] = cleanpath(settings["WHITELISTS_DIR"]) settings["PID_DIR"] = cleanpath(settings["PID_DIR"]) settings["LOG_DIR"] = cleanpath(settings["LOG_DIR"]) settings["pidfile"] = cleanpath(settings["pidfile"]) # Set process uid/gid by changing the parent config, if a user was # provided in the configuration file. if settings.USER: self.parent["uid"], self.parent["gid"] = (pwd.getpwnam( settings.USER)[2:4]) # Set the pidfile in parent config to the value that was computed by # C{read_config}. self.parent["pidfile"] = settings["pidfile"] storage_schemas = join(settings["CONF_DIR"], "storage-schemas.conf") if not exists(storage_schemas): print("Error: missing required config %s" % storage_schemas) sys.exit(1) if settings.CACHE_WRITE_STRATEGY not in ('timesorted', 'sorted', 'max', 'bucketmax', 'naive'): log.err( "%s is not a valid value for CACHE_WRITE_STRATEGY, defaulting to %s" % (settings.CACHE_WRITE_STRATEGY, defaults['CACHE_WRITE_STRATEGY'])) else: log.msg("Using %s write strategy for cache" % settings.CACHE_WRITE_STRATEGY) # Database-specific settings database = settings.DATABASE if database not in TimeSeriesDatabase.plugins: print("No database plugin implemented for '%s'" % database) raise SystemExit(1) database_class = TimeSeriesDatabase.plugins[database] state.database = database_class(settings) settings.CACHE_SIZE_LOW_WATERMARK = settings.MAX_CACHE_SIZE * 0.95 if "action" not in self: self["action"] = "start" self.handleAction() # If we are not running in debug mode or non-daemon mode, then log to a # directory, otherwise log output will go to stdout. If parent options # are set to log to syslog, then use that instead. if not self["debug"]: if self.parent.get("syslog", None): prefix = "%s-%s[%d]" % (program, self["instance"], os.getpid()) log.logToSyslog(prefix) elif not self.parent["nodaemon"]: logdir = settings.LOG_DIR if not isdir(logdir): os.makedirs(logdir) if settings.USER: # We have not yet switched to the specified user, # but that user must be able to create files in this # directory. os.chown(logdir, self.parent["uid"], self.parent["gid"]) log.logToDir(logdir) if self["whitelist"] is None: self["whitelist"] = join(settings["CONF_DIR"], "whitelist.conf") settings["whitelist"] = self["whitelist"] if self["blacklist"] is None: self["blacklist"] = join(settings["CONF_DIR"], "blacklist.conf") settings["blacklist"] = self["blacklist"]
def reloadStorageSchemas(): global SCHEMAS try: SCHEMAS = loadStorageSchemas() except Exception as e: log.msg("Failed to reload storage SCHEMAS: %s" % (e))
def shutdownModifyUpdateSpeed(): try: settings.MAX_UPDATES_PER_SECOND = settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN log.msg("Carbon shutting down. Changed the update rate to: " + str(settings.MAX_UPDATES_PER_SECOND_ON_SHUTDOWN)) except KeyError: log.msg("Carbon shutting down. Update rate not changed")
def startShutdown(results): log.msg("startShutdown(%s)" % str(results)) allStopped = client_manager.stopAllClients() allStopped.addCallback(shutdown)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" updates = 0 lastSecond = 0 while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in schemas: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [ archive.getTuple() for archive in schema.archives ] break for schema in agg_schemas: if schema.matches(metric): log.creates( 'new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception( "No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) try: os.makedirs(dbDir, 0755) except OSError as e: log.err("%s" % e) log.creates( "creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod, settings.WHISPER_SPARSE_CREATE, settings.WHISPER_FALLOCATE_CREATE) instrumentation.increment('creates') try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) t2 = time.time() updateTime = t2 - t1 except: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Rate limit update operations thisSecond = int(t2) if thisSecond != lastSecond: lastSecond = thisSecond updates = 0 else: updates += 1 if updates >= settings.MAX_UPDATES_PER_SECOND: time.sleep(int(t2 + 1) - t2) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)