def getMetrics(self): metrics = MetricCache.counts() for metric, queueSize in metrics: datapoints = MetricCache.pop(metric) if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() yield (metric, datapoints)
def optimalWriteOrder(): "Generates metrics with the most cached values first and applies a soft rate limit on new metrics" global lastCreateInterval global createCount metrics = MetricCache.counts() t = time.time() metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) for metric, queueSize in metrics: if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() # Let our persister do its own check, and ignore the metric if needed. if not persister.pre_get_datapoints_check(metric): continue try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs dbInfo = persister.get_dbinfo(metric) dbIdentifier = dbInfo[0] dbExists = dbInfo[1] yield (metric, datapoints, dbIdentifier, dbExists)
def optimalWriteOrder(): "Generates metrics with the most cached values first and applies a soft rate limit on new metrics" global lastCreateInterval global createCount metrics = [ (metric, len(datapoints)) for metric,datapoints in MetricCache.items() ] t = time.time() metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) for metric, queueSize in metrics: dbFilePath = getFilesystemPath(metric) dbFileExists = exists(dbFilePath) if not dbFileExists: createCount += 1 now = time.time() if now - lastCreateInterval >= 60: lastCreateInterval = now createCount = 1 elif createCount >= settings.MAX_CREATES_PER_MINUTE: continue try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs yield (metric, datapoints, dbFilePath, dbFileExists)
def _flush(prefix=None): """ Write/create whisped files at maximal speed """ assert(prefix==None or hasattr(prefix, 'startswith')) log.msg("flush started (prefix: %s)" % prefix) started = time.time() metrics = MetricCache.counts() updates = 0 write_lock.acquire() try: for metric, queueSize in metrics: if prefix and not metric.startswith(prefix): continue dbFilePath = getFilesystemPath(metric) dbFileExists = exists(dbFilePath) try: datapoints = MetricCache.pop(metric) except KeyError: continue if not createWhisperFile(metric, dbFilePath, dbFileExists): continue if not writeWhisperFile(dbFilePath, datapoints): continue updates += 1 finally: write_lock.release() log.msg('flush finished (updates: %d, time: %.5f sec)' % (updates, time.time()-started)) return updates
def backIntoCache(metricList): for (metric, datapoints) in metricList: for point in datapoints: try: MetricCache.store(metric, point) except: datapoints.append(point) log.msg("Failed to publish to RabbitMQ. Pushed the metrics back to cache")
def optimalWriteOrder(): "Generates metrics with the most cached values first and applies a soft rate limit on new metrics" global lastCreateInterval global createCount metrics = MetricCache.counts() t = time.time() #metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() for metric, queueSize in metrics: #Handling special characters in metric names metric_sanit = list(metric) for i in range(0,len(metric_sanit),1): if metric_sanit[i] < '\x20' or metric_sanit[i] > '\x7e': metric_sanit[i] = '_' metric_sanit = "".join(metric_sanit) try: dbFilePath = getFilesystemPath(metric_sanit) dbFileExists = exists(dbFilePath) except: log.err() log.msg("dbFilePath: %s" % (dbFilePath)) continue if not dbFileExists: createCount += 1 now = time.time() if now - lastCreateInterval >= 60: lastCreateInterval = now createCount = 1 elif createCount >= settings.MAX_CREATES_PER_MINUTE: # dropping queued up datapoints for new metrics prevents filling up the entire cache # when a bunch of new metrics are received. try: MetricCache.pop(metric) except KeyError: pass continue try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs yield (metric_sanit, datapoints, dbFilePath, dbFileExists)
def optimalWriteOrder(): log.msg("Entered optimalWriteOrder") metrics = MetricCache.counts() t = time.time() metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) for metric, queueSize in metrics: if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs yield (metric, datapoints)
def stringReceived(self, metric): values = MetricCache.get(metric, []) log.query('cache query for %s returned %d values' % (metric, len(values))) response = pickle.dumps(values, protocol=-1) self.sendString(response) increment('cacheQueries')
def optimalWriteOrder(): """Generates metrics with the most cached values first and applies a soft rate limit on new metrics""" global lastCreateInterval global createCount metrics = MetricCache.counts() t = time.time() metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.debug("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) for metric, queueSize in metrics: if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() dbFilePath = getFilesystemPath(metric) dbFileExists = exists(dbFilePath) if not dbFileExists: createCount += 1 now = time.time() if now - lastCreateInterval >= 60: lastCreateInterval = now createCount = 1 elif createCount >= settings.MAX_CREATES_PER_MINUTE: # dropping queued up datapoints for new metrics prevents filling up the entire cache # when a bunch of new metrics are received. try: MetricCache.pop(metric) except KeyError: pass continue try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs yield (metric, datapoints, dbFilePath, dbFileExists)
def stringReceived(self, rawRequest): request = self.unpickler.loads(rawRequest) cache = MetricCache() if request['type'] == 'cache-query': metric = request['metric'] datapoints = list(cache.get(metric, {}).items()) result = dict(datapoints=datapoints) if settings.LOG_CACHE_HITS: log.query('[%s] cache query for \"%s\" returned %d values' % (self.peerAddr, metric, len(datapoints))) instrumentation.increment('cacheQueries') elif request['type'] == 'cache-query-bulk': datapointsByMetric = {} metrics = request['metrics'] for metric in metrics: datapointsByMetric[metric] = list( cache.get(metric, {}).items()) result = dict(datapointsByMetric=datapointsByMetric) if settings.LOG_CACHE_HITS: log.query( '[%s] cache query bulk for \"%d\" metrics returned %d values' % (self.peerAddr, len(metrics), sum([ len(datapoints) for datapoints in datapointsByMetric.values() ]))) instrumentation.increment('cacheBulkQueries') instrumentation.append('cacheBulkQuerySize', len(metrics)) elif request['type'] == 'get-metadata': result = management.getMetadata(request['metric'], request['key']) elif request['type'] == 'set-metadata': result = management.setMetadata(request['metric'], request['key'], request['value']) else: result = dict(error="Invalid request type \"%s\"" % request['type']) response = pickle.dumps(result, protocol=2) self.sendString(response)
def test_constructor(self): settings = { 'CACHE_WRITE_STRATEGY': 'max', } settings_patch = patch.dict('carbon.conf.settings', settings) settings_patch.start() cache = MetricCache() self.assertNotEqual(cache, None) self.assertTrue(isinstance(cache.strategy, MaxStrategy))
def optimalWriteOrder(): """Generates metrics with the most cached values first and applies a soft rate limit on new metrics""" global lastCreateInterval global createCount metrics = MetricCache.counts() time_ = time.time() metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending log.debug("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - time_)) for metric, queueSize in metrics: if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() dbFileExists = APP_DB.exists(metric) if not dbFileExists: createCount += 1 now = time.time() if now - lastCreateInterval >= 60: lastCreateInterval = now createCount = 1 elif createCount >= settings.MAX_CREATES_PER_MINUTE: # dropping queued up datapoints for new metrics prevents filling up the entire cache # when a bunch of new metrics are received. try: MetricCache.pop(metric) except KeyError: pass continue try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() datapoints = MetricCache.pop(metric) except KeyError: log.msg("MetricCache contention, skipping %s update for now" % metric) continue # we simply move on to the next metric when this race condition occurs yield (metric, datapoints, dbFileExists)
def stringReceived(self, rawRequest): request = self.unpickler.loads(rawRequest) cache = MetricCache() if request['type'] == 'cache-query': metric = request['metric'] datapoints = list(cache.get(metric, {}).items()) result = dict(datapoints=datapoints) if settings.LOG_CACHE_HITS: log.query('[%s] cache query for \"%s\" returned %d values' % ( self.peerAddr, metric, len(datapoints) )) instrumentation.increment('cacheQueries') elif request['type'] == 'cache-query-bulk': datapointsByMetric = {} metrics = request['metrics'] for metric in metrics: datapointsByMetric[metric] = list(cache.get(metric, {}).items()) result = dict(datapointsByMetric=datapointsByMetric) if settings.LOG_CACHE_HITS: log.query('[%s] cache query bulk for \"%d\" metrics returned %d values' % ( self.peerAddr, len(metrics), sum([len(datapoints) for datapoints in datapointsByMetric.values()]) )) instrumentation.increment('cacheBulkQueries') instrumentation.append('cacheBulkQuerySize', len(metrics)) elif request['type'] == 'get-metadata': result = management.getMetadata(request['metric'], request['key']) elif request['type'] == 'set-metadata': result = management.setMetadata(request['metric'], request['key'], request['value']) else: result = dict(error="Invalid request type \"%s\"" % request['type']) response = pickle.dumps(result, protocol=2) self.sendString(response)
def optimalWriteOrder(): """Generates metrics with the most cached values first and applies a soft rate limit on new metrics""" cache = MetricCache() while cache: (metric, datapoints) = cache.drain_metric() dbFileExists = state.database.exists(metric) if not dbFileExists and CREATE_BUCKET: # If our tokenbucket has enough tokens available to create a new metric # file then yield the metric data to complete that operation. Otherwise # we'll just drop the metric on the ground and move on to the next # metric. # XXX This behavior should probably be configurable to no tdrop metrics # when rate limitng unless our cache is too big or some other legit # reason. if CREATE_BUCKET.drain(1): yield (metric, datapoints, dbFileExists) continue yield (metric, datapoints, dbFileExists)
def test_write_strategy_sorted(self): """Create a metric cache, insert metrics, ensure sorted writes""" self.assertEqual("sorted", MetricCache.method) now = time.time() datapoint1 = (now - 10, float(1)) datapoint2 = (now, float(2)) MetricCache.store("d.e.f", datapoint1) MetricCache.store("a.b.c", datapoint1) MetricCache.store("a.b.c", datapoint2) (m, d) = MetricCache.pop() self.assertEqual(("a.b.c", deque([datapoint1, datapoint2])), (m, d)) (m, d) = MetricCache.pop() self.assertEqual(("d.e.f", deque([datapoint1])), (m, d)) self.assertEqual(0, MetricCache.size)
def pre_retrieve_metric_check(self, metric): dbinfo = self.get_dbinfo(metric) dbFilePath = dbInfo[0] dbFileExists = dbInfo[1] if not dbFileExists: createCount += 1 now = time.time() if now - lastCreateInterval >= 60: lastCreateInterval = now createCount = 1 elif createCount >= settings.MAX_CREATES_PER_MINUTE: # dropping queued up datapoints for new metrics prevents filling up the entire cache # when a bunch of new metrics are received. try: MetricCache.pop(metric) except KeyError: pass return False return True
def writeCachedDataPoints(channel, exchange): log.msg("Entered function writeCachedDataPoints") log.msg("MetricCache count: %d"%(len(MetricCache.counts()))) while MetricCache: dataWritten = False log.msg("Calling optimalWriteOrder") for (metric, datapoints) in optimalWriteOrder(): dataWritten = True body = "" for point in datapoints: temp = "%f %d\n"%(point[1], point[0]) body = body + temp message = Content(body) message["delivery mode"] = 2 channel.basic_publish(exchange=exchange, content=message, routing_key=metric) log.updates("Published %d datapoints of metric %s"%(len(datapoints),metric))
def optimalWriteOrder(): """Generates metrics with the most cached values first and applies a soft rate limit on new metrics""" while MetricCache: (metric, datapoints) = MetricCache.pop() if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: events.cacheSpaceAvailable() dbFilePath = getFilesystemPath(metric) dbFileExists = exists(dbFilePath) if not dbFileExists and CREATE_BUCKET: # If our tokenbucket has enough tokens available to create a new metric # file then yield the metric data to complete that operation. Otherwise # we'll just drop the metric on the ground and move on to the next # metric. # XXX This behavior should probably be configurable to no tdrop metrics # when rate limitng unless our cache is too big or some other legit # reason. if CREATE_BUCKET.drain(1): yield (metric, datapoints, dbFilePath, dbFileExists) continue yield (metric, datapoints, dbFilePath, dbFileExists)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" cache = MetricCache() while cache: (metric, datapoints) = cache.drain_metric() if metric is None: # end the loop break dbFileExists = state.database.exists(metric) if not dbFileExists: if CREATE_BUCKET and not CREATE_BUCKET.drain(1): # If our tokenbucket doesn't have enough tokens available to create a new metric # file then we'll just drop the metric on the ground and move on to the next # metric. # XXX This behavior should probably be configurable to no tdrop metrics # when rate limitng unless our cache is too big or some other legit # reason. instrumentation.increment('droppedCreates') continue archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception(("No storage schema matched the metric '%s'," " check your storage-schemas.conf file.") % metric) if settings.LOG_CREATES: log.creates("creating database metric %s (archive=%s xff=%s agg=%s)" % (metric, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) if settings.ENABLE_TAGS: tagQueue.add(metric) instrumentation.increment('creates') except Exception as e: log.err() log.msg("Error creating %s: %s" % (metric, e)) instrumentation.increment('errors') continue # If we've got a rate limit configured lets makes sure we enforce it waitTime = 0 if UPDATE_BUCKET: t1 = time.time() yield UPDATE_BUCKET.drain(1, blocking=True) waitTime = time.time() - t1 try: t1 = time.time() # If we have duplicated points, always pick the last. update_many() # has no guaranted behavior for that, and in fact the current implementation # will keep the first point in the list. datapoints = dict(datapoints).items() state.database.write(metric, datapoints) if settings.ENABLE_TAGS: tagQueue.update(metric) updateTime = time.time() - t1 except Exception as e: log.err() log.msg("Error writing to %s: %s" % (metric, e)) instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: if waitTime > 0.001: log.updates("wrote %d datapoints for %s in %.5f seconds after waiting %.5f seconds" % ( pointCount, metric, updateTime, waitTime)) else: log.updates("wrote %d datapoints for %s in %.5f seconds" % ( pointCount, metric, updateTime))
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" cache = MetricCache() while cache: (metric, datapoints) = cache.drain_metric() if metric is None: # end the loop break dbFileExists = state.database.exists(metric) if not dbFileExists: if CREATE_BUCKET and not CREATE_BUCKET.drain(1): # If our tokenbucket doesn't have enough tokens available to create a new metric # file then we'll just drop the metric on the ground and move on to the next # metric. # XXX This behavior should probably be configurable to no tdrop metrics # when rate limitng unless our cache is too big or some other legit # reason. instrumentation.increment('droppedCreates') continue archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception(("No storage schema matched the metric '%s'," " check your storage-schemas.conf file.") % metric) if settings.LOG_CREATES: log.creates("creating database metric %s (archive=%s xff=%s agg=%s)" % (metric, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) if settings.ENABLE_TAGS: tagQueue.add(metric) instrumentation.increment('creates') except Exception as e: log.err() log.msg("Error creating %s: %s" % (metric, e)) instrumentation.increment('errors') continue # If we've got a rate limit configured lets makes sure we enforce it waitTime = 0 if UPDATE_BUCKET: t1 = time.time() UPDATE_BUCKET.drain(1, blocking=True) waitTime = time.time() - t1 try: t1 = time.time() # If we have duplicated points, always pick the last. update_many() # has no guaranted behavior for that, and in fact the current implementation # will keep the first point in the list. datapoints = dict(datapoints).items() state.database.write(metric, datapoints) if settings.ENABLE_TAGS: tagQueue.update(metric) updateTime = time.time() - t1 except Exception as e: log.err() log.msg("Error writing to %s: %s" % (metric, e)) instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: if waitTime > 0.001: log.updates("wrote %d datapoints for %s in %.5f seconds after waiting %.5f seconds" % ( pointCount, metric, updateTime, waitTime)) else: log.updates("wrote %d datapoints for %s in %.5f seconds" % ( pointCount, metric, updateTime))
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" cache = MetricCache() while cache: dataWritten = False for (metric, datapoints, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [ archive.getTuple() for archive in schema.archives ] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): if settings.LOG_CREATES: log.creates( 'new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception( "No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) if settings.LOG_CREATES: log.creates( "creating database metric %s (archive=%s xff=%s agg=%s)" % (metric, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) instrumentation.increment('creates') except Exception, e: log.err() log.msg("Error creating %s: %s" % (metric, e)) instrumentation.increment('errors') continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() # If we have duplicated points, always pick the last. update_many() # has no guaranted behavior for that, and in fact the current implementation # will keep the first point in the list. datapoints = dict(datapoints).items() state.database.write(metric, datapoints) updateTime = time.time() - t1 except Exception, e: log.err() log.msg("Error writing to %s: %s" % (metric, e)) instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))
def store(metric, value): fullMetric = 'carbon.agents.%s.%s' % (HOSTNAME, metric) datapoint = (time.time(), value) MetricCache.store(fullMetric, datapoint)