def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict(config.items(section)) pattern = options.get('pattern') retentions = options['retentions'].split(',') archives = [Archive.fromString(s) for s in retentions] if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Section missing 'pattern': %s" % section) continue archiveList = [a.getTuple() for a in archives] try: whisper.validateArchiveList(archiveList) schemaList.append(mySchema) except whisper.InvalidConfiguration, e: log.msg("Invalid schemas found in %s: %s" % (section, e))
def read_rules(self): if not exists(self.rules_file): self.clear() return # Only read if the rules file has been modified try: mtime = getmtime(self.rules_file) except OSError: log.err("Failed to get mtime of %s" % self.rules_file) return if mtime <= self.rules_last_read: return # Read new rules log.aggregator("reading new aggregation rules from %s" % self.rules_file) new_rules = [] for line in open(self.rules_file): line = line.strip() if line.startswith('#') or not line: continue rule = self.parse_definition(line) new_rules.append(rule) log.aggregator("clearing aggregation buffers") BufferManager.clear() self.rules = new_rules self.rules_last_read = mtime
def writeForever(): while reactor.running: try: writeCachedDataPoints() except: log.err() time.sleep(1) # The writer thread only sleeps when the cache is empty or an error occurs
def reloadStorageSchemas(): global SCHEMAS try: SCHEMAS = loadStorageSchemas() except Exception: log.msg("Failed to reload storage SCHEMAS") log.err()
def read_list(self): # Clear rules and move on if file isn't there if not os.path.exists(self.list_file): self.regex_list = [] return try: mtime = os.path.getmtime(self.list_file) except: log.err("Failed to get mtime of %s" % self.list_file) return if mtime <= self.rules_last_read: return # Begin read new_regex_list = [] for line in open(self.list_file): pattern = line.strip() if line.startswith('#') or not line: continue try: new_regex_list.append(re.compile(pattern)) except: log.err("Failed to parse '%s' in '%s'. Ignoring line" % (pattern, self.list_file)) self.regex_list = new_regex_list self.rules_last_read = mtime
def write(self, metric_name, datapoints): metric_name = self.encode(metric_name) # Get a Metric object from metric name. metric = self.cache.get_metric(metric_name=metric_name) if not metric: # Metric was not found, but most likely it was cached; forcing dropping cache. self.cache.cache_drop(metric_name) # Because we have multiple layers of cache (one in carbon, one in biggraphite), # it can take up to 2 step for the metric to be recreated before it get filled again. log.err( "Could not find %s; cleaning cache to recreate the metric in database." % (metric_name)) return # Round down timestamp because inner functions expect integers. datapoints = [(int(timestamp), value) for timestamp, value in datapoints] # Writing every point synchronously increase CPU usage by ~300% as per https://goo.gl/xP5fD9 if self._sync_countdown < 1: self.accessor.insert_points(metric=metric, datapoints=datapoints) self._sync_countdown = self._sync_every_n_writes else: self._sync_countdown -= 1 self.accessor.insert_points_async(metric=metric, datapoints=datapoints)
def getMetadata(metric, key): try: value = state.database.getMetadata(metric, key) return dict(value=value) except Exception: log.err() return dict(error=traceback.format_exc())
def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict( config.items(section) ) pattern = options.get('pattern') retentions = options['retentions'].split(',') archives = [ Archive.fromString(s) for s in retentions ] if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Section missing 'pattern': %s" % section) continue archiveList = [a.getTuple() for a in archives] try: whisper.validateArchiveList(archiveList) schemaList.append(mySchema) except whisper.InvalidConfiguration, e: log.msg("Invalid schemas found in %s: %s" % (section, e) )
def createWhisperFile(metric, dbFilePath, dbFileExists): if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in schemas: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in agg_schemas: if schema.matches(metric): log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) try: os.makedirs(dbDir) except OSError as e: if e.errno != errno.EEXIST: log.err("%s" % e) log.creates("creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) try: whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod, settings.WHISPER_SPARSE_CREATE, settings.WHISPER_FALLOCATE_CREATE) instrumentation.increment('creates') except Exception, e: log.err("Error creating %s: %s" % (dbFilePath, e)) return False
def writeForever(): while reactor.running: try: writeCachedDataPoints() except Exception: log.err() time.sleep(1) # The writer thread only sleeps when the cache is empty or an error occurs
def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict(config.items(section)) pattern = options.get('pattern') try: retentions = options.get('retentions').split(',') archives = [Archive.fromString(s) for s in retentions] except KeyError: log.err("Schema %s missing 'retentions', skipping" % section) continue if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Schema %s missing 'pattern', skipping" % section) continue archiveList = [a.getTuple() for a in archives] try: if state.database is not None: state.database.validateArchiveList(archiveList) schemaList.append(mySchema) except ValueError as e: log.msg("Invalid schemas found in %s: %s" % (section, e)) schemaList.append(defaultSchema) return schemaList
def reloadStorageSchemas(): global schemas try: schemas = loadStorageSchemas() except: log.msg("Failed to reload storage schemas") log.err()
def setMetadata(metric, key, value): try: old_value = state.database.setMetadata(metric, key, value) return dict(old_value=old_value, new_value=value) except Exception: log.err() return dict(error=traceback.format_exc())
def read_list(self): # Clear rules and move on if file isn't there if not os.path.exists(self.list_file): self.regex_list = [] return try: mtime = os.path.getmtime(self.list_file) except OSError: log.err("Failed to get mtime of %s" % self.list_file) return if mtime <= self.rules_last_read: return # Begin read new_regex_list = [] for line in open(self.list_file): pattern = line.strip() if line.startswith('#') or not pattern: continue try: new_regex_list.append(re.compile(pattern)) except re.error: log.err("Failed to parse '%s' in '%s'. Ignoring line" % (pattern, self.list_file)) self.regex_list = new_regex_list self.rules_last_read = mtime
def reloadAggregationSchemas(): global agg_schemas try: agg_schemas = loadAggregationSchemas() except: log.msg("Failed to reload aggregation schemas") log.err()
def loadStorageSchemas(): schemaList = [] config = OrderedConfigParser() config.read(STORAGE_SCHEMAS_CONFIG) for section in config.sections(): options = dict(config.items(section)) pattern = options.get('pattern') try: retentions = options['retentions'].split(',') archives = [Archive.fromString(s) for s in retentions] except KeyError: log.err("Schema %s missing 'retentions', skipping" % section) continue if pattern: mySchema = PatternSchema(section, pattern, archives) else: log.err("Schema %s missing 'pattern', skipping" % section) continue archiveList = [a.getTuple() for a in archives] try: if state.database is not None: state.database.validateArchiveList(archiveList) schemaList.append(mySchema) except ValueError as e: log.msg("Invalid schemas found in %s: %s" % (section, e)) schemaList.append(defaultSchema) return schemaList
def reloadAggregationSchemas(): global AGGREGATION_SCHEMAS try: AGGREGATION_SCHEMAS = loadAggregationSchemas() except Exception: log.msg("Failed to reload aggregation SCHEMAS") log.err()
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" while MetricCache: dataWritten = False for (metric, datapoints, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) log.creates("creating database metric %s (archive=%s xff=%s agg=%s)" % (metric, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) instrumentation.increment('creates') except Exception, e: log.err() log.msg("Error creating %s: %s" % (metric, e)) instrumentation.increment('errors') continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() # If we have duplicated points, always pick the last. update_many() # has no guaranted behavior for that, and in fact the current implementation # will keep the first point in the list. datapoints = dict(datapoints).items() state.database.write(metric, datapoints) updateTime = time.time() - t1 except Exception, e: log.err() log.msg("Error writing to %s: %s" % (metric, e)) instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))
def create(self, metric, retentions, xfilesfactor, aggregation_method): path = self.getFilesystemPath(metric) directory = dirname(path) try: if not exists(directory): os.makedirs(directory) except OSError, e: log.err("%s" % e)
def flushCache(): global flush flush = 1 try: writeCachedDataPoints() except: log.err() flush = 0
def writeWhisperFile(dbFilePath, datapoints): try: whisper.update_many(dbFilePath, datapoints) except: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') return False return True
def reloadStorageSchemas(first_run=False): global schemas try: schemas = loadStorageSchemas() except Exception, e: if first_run: raise e log.msg("Failed to reload storage schemas") log.err()
def reloadAggregationSchemas(first_run=False): global agg_schemas try: agg_schemas = loadAggregationSchemas() except Exception, e: if first_run: raise e log.msg("Failed to reload aggregation schemas") log.err()
def setMetadata(metric, key, value): if key != 'aggregationMethod': return dict(error="Unsupported metadata key \"%s\"" % key) try: old_value = APP_DB.setAggregationMethod(metric, value) return dict(old_value=old_value, new_value=value) except: log.err() return dict(error=traceback.format_exc())
def lineReceived(self, line): # log.msg("[DEBUG] lineReceived(): %s" % line) try: (metric, value, timestamp) = line.split() datapoint = (float(timestamp), float(value)) assert datapoint[1] == datapoint[1] # filter out NaNs client_manager.sendDatapoint(metric, datapoint) except ValueError: log.err(None, 'Dropping invalid line: %s' % line)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) log.creates("creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) try: state.database.create(metric, archiveConfig, xFilesFactor, aggregationMethod) instrumentation.increment('creates') except Exception: log.err("Error creating %s" % (dbFilePath)) continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() state.database.write(metric, datapoints) updateTime = time.time() - t1 except Exception: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)
def writeForever(): global CACHE_SIZE_LOW_WATERMARK global CACHE_SIZE_ALERT while reactor.running: try: writeCachedDataPoints() except: log.err() time.sleep(5) flushCache()
def getMetadata(metric, key): if key != 'aggregationMethod': return dict(error="Unsupported metadata key \"%s\"" % key) try: value = APP_DB.info(metric)['aggregationMethod'] return dict(value=value) except: log.err() return dict(error=traceback.format_exc())
def writeForever(): while reactor.running: try: writeCachedDataPoints() except Exception: log.err() # Back-off on error to give the backend time to recover. time.sleep(0.1) else: # Avoid churning CPU when there are no metrics are in the cache time.sleep(1)
def writeTagsForever(): while reactor.running: try: writeTags() except Exception: log.err() # Back-off on error to give the backend time to recover. time.sleep(0.1) else: # Avoid churning CPU when there are no series in the queue time.sleep(0.2)
def publishForever(channel, exchange): while reactor.running: # msg = yield queue.get() # self.processMessage(msg, channel) try: log.msg("Calling function writeCachedDataPoints") writeCachedDataPoints(channel, exchange) except: log.err() log.msg("Time to sleep") time.sleep(10)
def writeForever(): if reactor.running: try: writeCachedDataPoints() except Exception: log.err() # Back-off on error to give the backend time to recover. reactor.callLater(0.1, writeForever) else: # Avoid churning CPU when there are no metrics are in the cache reactor.callLater(1, writeForever)
def writeTagsForever(): if reactor.running: try: writeTags() except Exception: log.err() # Back-off on error to give the backend time to recover. reactor.callLater(0.1, writeTagsForever) else: # Avoid churning CPU when there are no series in the queue reactor.callLater(0.2, writeTagsForever)
def getMetadata(metric, key): if key != 'aggregationMethod': return dict(error="Unsupported metadata key \"%s\"" % key) wsp_path = getFilesystemPath(metric) try: value = whisper.info(wsp_path)['aggregationMethod'] return dict(value=value) except Exception: log.err() return dict(error=traceback.format_exc())
def setMetadata(metric, key, value): if key != 'aggregationMethod': return dict(error="Unsupported metadata key \"%s\"" % key) wsp_path = getFilesystemPath(metric) try: old_value = whisper.setAggregationMethod(wsp_path, value) return dict(old_value=old_value, new_value=value) except Exception: log.err() return dict(error=traceback.format_exc())
def create(self, metric, retentions, xfilesfactor, aggregation_method): path = self.getFilesystemPath(metric) directory = dirname(path) try: if not exists(directory): os.makedirs(directory) except OSError as e: log.err("%s" % e) whisper.create(path, retentions, xfilesfactor, aggregation_method, self.sparse_create, self.fallocate_create)
def parse_definition(self, line): try: left_side, right_side = line.split('=', 1) output_pattern, frequency = left_side.split() method, input_pattern = right_side.split() frequency = int(frequency.lstrip('(').rstrip(')')) return AggregationRule(input_pattern, output_pattern, method, frequency) except ValueError: log.err("Failed to parse rule in %s, line: %s" % (self.rules_file, line)) raise
def parse_definition(self, line): try: left_side, right_side = line.split('=', 1) output_pattern, frequency = left_side.split() method, input_pattern = right_side.split() frequency = int(frequency.lstrip('(').rstrip(')')) return AggregationRule(input_pattern, output_pattern, method, frequency) except ValueError: log.err("Failed to parse line: %s" % line) raise
def __init__(self, settings): super(WhisperDatabase, self).__init__(settings) self.data_dir = settings.LOCAL_DATA_DIR self.sparse_create = settings.WHISPER_SPARSE_CREATE self.fallocate_create = settings.WHISPER_FALLOCATE_CREATE if settings.WHISPER_AUTOFLUSH: log.msg("Enabling Whisper autoflush") whisper.AUTOFLUSH = True if settings.WHISPER_FALLOCATE_CREATE: if whisper.CAN_FALLOCATE: log.msg("Enabling Whisper fallocate support") else: log.err("WHISPER_FALLOCATE_CREATE is enabled but linking failed.") if settings.WHISPER_LOCK_WRITES: if whisper.CAN_LOCK: log.msg("Enabling Whisper file locking") whisper.LOCK = True else: log.err("WHISPER_LOCK_WRITES is enabled but import of fcntl module failed.") if settings.WHISPER_FADVISE_RANDOM: try: if whisper.CAN_FADVISE: log.msg("Enabling Whisper fadvise_random support") whisper.FADVISE_RANDOM = True else: log.err("WHISPER_FADVISE_RANDOM is enabled but import of ftools module failed.") except AttributeError: log.err("WHISPER_FADVISE_RANDOM is enabled but skipped because it is not compatible " + "with the version of Whisper.")
def run_pipeline(metric, datapoint, processors=None): if processors is None: processors = state.pipeline_processors elif not processors: return processor = processors[0] try: for out_metric, out_datapoint in processor.process(metric, datapoint): try: run_pipeline(out_metric, out_datapoint, processors[1:]) except Exception: log.err() except Exception: log.err()
def _createMetrics(self): # We create metrics in a separate thread because this is potentially # slow and we don't want to slow down the main ingestion thread. # With a cold cache we will put all the metric in the queue and # asynchronously create the non-existing one but the points will # be written as soon as they are received. while self.reactor.running: try: self._createOneMetric() time.sleep(1 / float(self._creation_rate_limit)) except Exception: log.err() # Give the system time to recover, errors might be related # to the current load. time.sleep(1)
def __init__(self, settings): super(CeresDatabase, self).__init__(settings) self.data_dir = settings.LOCAL_DATA_DIR ceres.setDefaultNodeCachingBehavior(settings.CERES_NODE_CACHING_BEHAVIOR) ceres.setDefaultSliceCachingBehavior(settings.CERES_SLICE_CACHING_BEHAVIOR) ceres.MAX_SLICE_GAP = int(settings.CERES_MAX_SLICE_GAP) if settings.CERES_LOCK_WRITES: if ceres.CAN_LOCK: log.msg("Enabling Ceres file locking") ceres.LOCK_WRITES = True else: log.err("CERES_LOCK_WRITES is enabled but import of fcntl module failed.") self.tree = ceres.CeresTree(self.data_dir)
def get_aggregate_metric(self, metric_path): if metric_path in self.cache: return self.cache[metric_path] match = self.regex.match(metric_path) result = None if match: extracted_fields = match.groupdict() try: result = self.output_template % extracted_fields except: log.err("Failed to interpolate template %s with fields %s" % (self.output_template, extracted_fields)) self.cache[metric_path] = result return result
def _createMetrics(self): # We create metrics in a separate thread because this is potentially # slow and we don't want to slow down the main ingestion thread. # With a cold cache we will put all the metric in the queue and # asynchronously create the non-existing one but the points will # be written as soon as they are received. while self.reactor.running: try: self._createOneMetric() # Hard limit to 300 creations per seconds. This is mostly # to give priority to other threads. A typical carbon instance # can handle up to 200k metrics per second so it will take # ~10 minutes to check all metrics. time.sleep(0.003) except Exception: log.err() # Give the system time to recover, errors might be related # to the current load. time.sleep(1)