def test_single_metric(self): xfilesfactor = 0.5 aggregation_method = "last" # This retentions are such that every other point is present in both # archives. Test validates that duplicate points gets inserted only once. retentions = [(1, 10), (2, 10)] high_precision_duration = retentions[0][0] * retentions[0][1] low_precision_duration = retentions[1][0] * retentions[1][1] now = int(time.time()) time_from, time_to = now - low_precision_duration, now points = [(float(t), float(now - t)) for t in xrange(time_from, time_to)] metric = "test_metric" metric_path = os_path.join(self.tempdir, metric + ".wsp") whisper.create(metric_path, retentions, xfilesfactor, aggregation_method) whisper.update_many(metric_path, points) self._call_main() metric = self.accessor.get_metric(metric) self.assertTrue(metric) self.assertEqual(metric.name, metric.name) self.assertEqual(metric.aggregator.carbon_name, aggregation_method) self.assertEqual(metric.carbon_xfilesfactor, xfilesfactor) self.assertEqual(metric.retention.as_string, "10*1s:10*2s") points_again = list( self.accessor.fetch_points(metric, time_from, time_to, metric.retention[0])) self.assertEqual(points[-high_precision_duration:], points_again)
def test_update_many_excess(self): # given an empty db wsp = "test_update_many_excess.wsp" self.addCleanup(self._remove, wsp) archive_len = 3 archive_step = 1 whisper.create(wsp, [(archive_step, archive_len)]) # given too many points than the db can hold excess_len = 1 num_input_points = archive_len + excess_len test_now = int(time.time()) input_start = test_now - num_input_points + archive_step input_points = [(input_start + i, random.random() * 10) for i in range(num_input_points)] # when the db is updated with too many points whisper.update_many(wsp, input_points, now=test_now) # then only the most recent input points (those at the end) were written actual_time_info = whisper.fetch(wsp, 0, now=test_now)[0] self.assertEqual(actual_time_info, (input_points[-archive_len][0], input_points[-1][0] + archive_step, # untilInterval = newest + step archive_step))
def test_single_metric(self): xfilesfactor = 0.5 aggregation_method = "last" # This retentions are such that every other point is present in both # archives. Test validates that duplicate points gets inserted only once. retentions = [(1, 10), (2, 10)] high_precision_duration = retentions[0][0] * retentions[0][1] low_precision_duration = retentions[1][0] * retentions[1][1] now = int(time.time()) time_from, time_to = now - low_precision_duration, now points = [(float(t), float(now-t)) for t in xrange(time_from, time_to)] metric = "test_metric" metric_path = os_path.join(self.tempdir, metric + ".wsp") whisper.create(metric_path, retentions, xfilesfactor, aggregation_method) whisper.update_many(metric_path, points) self._call_main() metric = self.accessor.get_metric(metric) self.assertTrue(metric) self.assertEqual(metric.name, metric.name) self.assertEqual(metric.aggregator.carbon_name, aggregation_method) self.assertEqual(metric.carbon_xfilesfactor, xfilesfactor) self.assertEqual(metric.retention.as_string, "10*1s:10*2s") points_again = list(self.accessor.fetch_points( metric, time_from, time_to, metric.retention[0])) self.assertEqual(points[-high_precision_duration:], points_again)
def update(self, updates): """ whisperをまとめて更新する """ # updatesをbox_id別に管理する sorted_updates = defaultdict(lambda: defaultdict(int)) # whisperのtimestampは秒単位なので、秒以下はまとめる for box_id, timestamp in updates: sorted_updates[box_id][int(timestamp)] += 1 for box_id, timestamps in sorted_updates.items(): # whisperの中でSORTかけてた # timestamps = sorted(timestamps.items()) timestamps = timestamps.items() db_path = os.path.join(self.dir_prefix, TimedDB.make_db_name(box_id)) if not os.path.exists(db_path): logger.info('create whsiper db for box %s at path %s', box_id, db_path) whisper.create(db_path, WHISPER_ARCHIVES, xFilesFactor=0.5, aggregationMethod='sum', sparse=False, useFallocate=True) with get_lock(db_path): whisper.update_many(db_path, timestamps)
def writeWhisperFile(dbFilePath, datapoints): try: whisper.update_many(dbFilePath, datapoints) except: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') return False return True
def _createdb(self, wsp, schema=[(1, 20)], data=None): whisper.create(wsp, schema) if data is None: tn = time.time() - 20 data = [] for i in range(20): data.append((tn + 1 + i, random.random() * 10)) whisper.update_many(wsp, data) return data
def test_resize_with_aggregate(self): """resize whisper file with aggregate""" # 60s per point save two days retention = [(60, 60 * 24 * 2)] whisper.create(self.filename, retention) # insert data now_timestamp = int( (datetime.now() - datetime(1970, 1, 1)).total_seconds()) now_timestamp -= now_timestamp % 60 # format timestamp points = [(now_timestamp - i * 60, i) for i in range(0, 60 * 24 * 2)] whisper.update_many(self.filename, points) data = whisper.fetch(self.filename, fromTime=now_timestamp - 3600 * 25, untilTime=now_timestamp - 3600 * 25 + 60 * 10) self.assertEqual(len(data[1]), 10) self.assertEqual(data[0][2], 60) # high retention == 60 for d in data[1]: self.assertIsNotNone(d) # resize from high to low os.system( 'whisper-resize.py %s 60s:1d 300s:2d --aggregate --nobackup >/dev/null' % self.filename) # noqa data_low = whisper.fetch(self.filename, fromTime=now_timestamp - 3600 * 25, untilTime=now_timestamp - 3600 * 25 + 60 * 10) self.assertEqual(len(data_low[1]), 2) self.assertEqual(data_low[0][2], 300) # low retention == 300 for d in data_low[1]: self.assertIsNotNone(d) data_high = whisper.fetch(self.filename, fromTime=now_timestamp - 60 * 10, untilTime=now_timestamp) self.assertEqual(len(data_high[1]), 10) self.assertEqual(data_high[0][2], 60) # high retention == 60 # resize from low to high os.system( 'whisper-resize.py %s 60s:2d --aggregate --nobackup >/dev/null' % self.filename) # noqa data1 = whisper.fetch(self.filename, fromTime=now_timestamp - 3600 * 25, untilTime=now_timestamp - 3600 * 25 + 60 * 10) self.assertEqual(len(data1[1]), 10) # noqa data1 looks like ((1588836720, 1588837320, 60), [None, None, 1490.0, None, None, None, None, 1485.0, None, None]) # data1[1] have two not none value self.assertEqual(len(list(filter(lambda x: x is not None, data1[1]))), 2) data2 = whisper.fetch(self.filename, fromTime=now_timestamp - 60 * 15, untilTime=now_timestamp - 60 * 5) # noqa data2 looks like ((1588925820, 1588926420, 60), [10.0, 11.0, 10.0, 9.0, 8.0, 5.0, 6.0, 5.0, 4.0, 3.0]) self.assertEqual(len(list(filter(lambda x: x is not None, data2[1]))), 10) # clean up self.tearDown()
def write_series(self, series): file_name = os.path.join( WHISPER_DIR, '{0}.wsp'.format(series.pathExpression.replace('.', os.sep))) os.makedirs(os.path.dirname(file_name)) whisper.create(file_name, [(1, 180)]) data = [] for index, value in enumerate(series): if value is None: continue data.append((series.start + index * series.step, value)) whisper.update_many(file_name, data)
def _update(self, datapoints): """ This method store in the datapoints in the current database. :datapoints: is a list of tupple with the epoch timestamp and value [(1368977629,10)] """ if len(datapoints) == 1: timestamp, value = datapoints[0] whisper.update(self.path, value, timestamp) else: whisper.update_many(self.path, datapoints)
def update(path, datapoints): nrOfPoints = len(datapoints), if nrOfPoints == 1: (timestamp, value) = datapoints[0] timestamp = timegm(timestamp.timetuple()) whisper.update(path, value, timestamp) elif nrOfPoints > 1: whisper.update_many(path + '.wsp', [ (timegm(t.timetuple()), v) for (t,v) in datapoints]) else: raise Exception("No Datapoint given") return True
def load_data(f_name, dest_file): with open(f_name, 'r') as fp: start = False for line in fp: datapoints = [] if start == False: if line.find("Archive ") == 0 and line.find(" data:") > 0: start = True else: datas = line.split(" ") if len(datas) == 3 and datas[0] != 'Archive': datapoints.append((datas[1][:-1], datas[2])) #print datapoints whisper.update_many(dest_file, datapoints)
def handle(self): points = 0 for metric in self.redis.smembers(METRICS): values = self.redis.zrange(metric, 0, -1) points += len(values) f = target_to_path(self.path, metric) d = os.path.dirname(f) if d not in self.dirs: if not os.path.isdir(d): os.makedirs(d) self.dirs.add(d) if not os.path.exists(f): whisper.create(f, [(10, 1000)]) # [FIXME] hardcoded values whisper.update_many(f, [struct.unpack('!ff', a) for a in values]) if len(values): self.redis.zrem(metric, *values) self.metric(METRIC_POINTS, points)
def fill(src, dst, tstart, tstop): # fetch range start-stop from src, taking values from the highest # precision archive, thus optionally requiring multiple fetch + merges srcHeader = info(src) srcArchives = srcHeader["archives"] srcArchives.sort(key=itemgetter("retention")) # find oldest point in time, stored by both files srcTime = int(time.time()) - srcHeader["maxRetention"] if tstart < srcTime and tstop < srcTime: return # we want to retain as much precision as we can, hence we do backwards # walk in time # skip forward at max 'step' points at a time for archive in srcArchives: # skip over archives that don't have any data points rtime = time.time() - archive["retention"] if tstop <= rtime: continue untilTime = tstop fromTime = rtime if rtime > tstart else tstart (timeInfo, values) = fetch(src, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list( itertools.ifilter( lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values) ) ) # order points by timestamp, newest first pointsToWrite.sort(key=lambda p: p[0], reverse=True) update_many(dst, pointsToWrite) tstop = fromTime # can stop when there's nothing to fetch any more if tstart == tstop: return
def _update(self, wsp=None, schema=None): wsp = wsp or self.filename schema = schema or [(1, 20)] num_data_points = 20 # create sample data whisper.create(wsp, schema) tn = time.time() - num_data_points data = [] for i in range(num_data_points): data.append((tn + 1 + i, random.random() * 10)) # test single update whisper.update(wsp, data[0][1], data[0][0]) # test multi update whisper.update_many(wsp, data[1:]) return data
def _update(self, wsp=None, schema=None, sparse=False, useFallocate=False): wsp = wsp or self.filename schema = schema or [(1, 20)] num_data_points = 20 # create sample data whisper.create(wsp, schema, sparse=sparse, useFallocate=useFallocate) tn = time.time() - num_data_points data = [] for i in range(num_data_points): data.append((tn + 1 + i, random.random() * 10)) # test single update whisper.update(wsp, data[0][1], data[0][0]) # test multi update whisper.update_many(wsp, data[1:]) return data
def _update(self, wsp=None, schema=None): wsp = wsp or self.db schema = schema or [(1, 20)] num_data_points = 20 whisper.create(wsp, schema) # create sample data tn = time.time() - num_data_points data = [] for i in range(num_data_points): data.append((tn + 1 + i, random.random() * 10)) # test single update whisper.update(wsp, data[0][1], data[0][0]) # test multi update whisper.update_many(wsp, data[1:]) return data
def fill(src, dst, tstart, tstop): # fetch range start-stop from src, taking values from the highest # precision archive, thus optionally requiring multiple fetch + merges srcHeader = info(src) srcArchives = srcHeader['archives'] srcArchives.sort(key=itemgetter('retention')) # find oldest point in time, stored by both files srcTime = int(time.time()) - srcHeader['maxRetention'] if tstart < srcTime and tstop < srcTime: return # we want to retain as much precision as we can, hence we do backwards # walk in time # skip forward at max 'step' points at a time for archive in srcArchives: # skip over archives that don't have any data points rtime = time.time() - archive['retention'] if tstop <= rtime: continue untilTime = tstop fromTime = rtime if rtime > tstart else tstart (timeInfo, values) = fetch(src, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list( itertools.ifilter( lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values))) # order points by timestamp, newest first pointsToWrite.sort(key=lambda p: p[0], reverse=True) update_many(dst, pointsToWrite) tstop = fromTime # can stop when there's nothing to fetch any more if tstart == tstop: return
def _update(self, wsp=None, schema=None, sparse=False, useFallocate=False): wsp = wsp or self.filename schema = schema or [(1, 20)] num_data_points = 20 # create sample data self.addCleanup(self._remove, wsp) whisper.create(wsp, schema, sparse=sparse, useFallocate=useFallocate) tn = int(time.time()) - num_data_points data = [] for i in range(num_data_points): data.append((tn + 1 + i, random.random() * 10)) # test single update whisper.update(wsp, data[0][1], data[0][0]) # test multi update whisper.update_many(wsp, data[1:]) return data
def test_single_metric(self): xfilesfactor = 0.5 aggregation_method = "last" retentions = [(1, 60)] now = int(time.time()) time_from, time_to = now - 10, now points = [(t, now-t) for t in xrange(time_from, time_to)] metric = "test_metric" metric_path = os_path.join(self.tempdir, metric + ".wsp") whisper.create(metric_path, retentions, xfilesfactor, aggregation_method) whisper.update_many(metric_path, points) self._call_main() meta = self.accessor.get_metric(metric) self.assertTrue(meta) self.assertEqual(meta.name, metric) self.assertEqual(meta.carbon_aggregation, aggregation_method) self.assertEqual(meta.carbon_xfilesfactor, xfilesfactor) self.assertEqual(meta.carbon_retentions, retentions) points_again = self.accessor.fetch_points(metric, time_from, time_to, step=1) self.assertEqual(points, points_again)
The license of this file is explicitly Apache License 2.0 in accordance with this, and its usage of the whisper libraries. It is designed to be called externally by NAV migration tools to avoid license incompatibilities between GPL v2 and Apache License v2. """ import sys import time import argparse try: import whisper except ImportError: raise SystemExit('[ERROR] Please make sure whisper is installed properly') now = int(time.time()) option_parser = argparse.ArgumentParser( description="Accepts multiple Whisper datapoints on stdin to update a " "single .wsp file") option_parser.add_argument("filename", nargs=1, help="path to a .wsp file to update") args = option_parser.parse_args() datapoint_strings = [point.replace('N:', '%d:' % now) for point in sys.stdin] datapoints = [tuple(point.strip().split(':')) for point in datapoint_strings] try: whisper.update_many(args.filename, datapoints) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc))
def update_many(self, metric, datapoints): return whisper.update_many(self.getFilesystemPath(metric), datapoints)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) try: if not exists(dbDir): os.makedirs(dbDir) except OSError, e: log.err("%s" % e) log.creates("creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) try: whisper.create( dbFilePath, archiveConfig, xFilesFactor, aggregationMethod, settings.WHISPER_SPARSE_CREATE, settings.WHISPER_FALLOCATE_CREATE) instrumentation.increment('creates') except: log.err("Error creating %s" % (dbFilePath)) continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) updateTime = time.time() - t1 except Exception: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)
def write(self, metric, datapoints): path = self.getFilesystemPath(metric) whisper.update_many(path, datapoints)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" updates = 0 lastSecond = 0 while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in schemas: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [ archive.getTuple() for archive in schema.archives ] break for schema in agg_schemas: if schema.matches(metric): log.creates( 'new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception( "No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) try: os.makedirs(dbDir, 0755) except OSError as e: log.err("%s" % e) log.creates( "creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod, settings.WHISPER_SPARSE_CREATE, settings.WHISPER_FALLOCATE_CREATE) instrumentation.increment('creates') try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) t2 = time.time() updateTime = t2 - t1 except: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Rate limit update operations thisSecond = int(t2) if thisSecond != lastSecond: lastSecond = thisSecond updates = 0 else: updates += 1 if updates >= settings.MAX_UPDATES_PER_SECOND: time.sleep(int(t2 + 1) - t2) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)
#!/usr/bin/env python import sys, time import whisper from optparse import OptionParser now = int( time.time() ) option_parser = OptionParser( usage='''%prog [options] path timestamp:value [timestamp:value]*''') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] datapoint_strings = args[1:] datapoint_strings = [point.replace('N:', '%d:' % now) for point in datapoint_strings] datapoints = [tuple(point.split(':')) for point in datapoint_strings] if len(datapoints) == 1: timestamp,value = datapoints[0] whisper.update(path, value, timestamp) else: print datapoints whisper.update_many(path, datapoints)
os.unlink(tmpfile) newfile = tmpfile else: newfile = options.newfile print 'Creating new whisper database: %s' % newfile whisper.create(newfile, new_archives, xFilesFactor=xff) size = os.stat(newfile).st_size print 'Created: %s (%d bytes)' % (newfile,size) print 'Migrating data...' for archive in old_archives: timeinfo, values = archive['data'] datapoints = zip( range(*timeinfo), values ) datapoints = filter(lambda p: p[1] is not None, datapoints) whisper.update_many(newfile, datapoints) if options.newfile is not None: sys.exit(0) backup = path + '.bak' print 'Renaming old database to: %s' % backup os.rename(path, backup) try: print 'Renaming new database to: %s' % path os.rename(tmpfile, path) except: traceback.print_exc() print '\nOperation failed, restoring backup' os.rename(backup, path)
def update_many(self, metric, datapoints, dbIdentifier): dbFilePath = dbIdentifier whisper.update_many(dbFilePath, datapoints)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" global updates global lastSecond # while MetricCache: # dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in schemas: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in agg_schemas: if schema.matches(metric): log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) try: dbDir = dirname(dbFilePath) os.system("umask u=rwx,go=rx ; mkdir -p -m 755 '%s'" % dbDir) log.creates("creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod) os.chmod(dbFilePath, 0755) instrumentation.increment('creates') except IOError as e: log.msg("IOError: {0}".format(e)) try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) t2 = time.time() updateTime = t2 - t1 except: log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Rate limit update operations thisSecond = int(t2) if thisSecond != lastSecond: lastSecond = thisSecond updates = 0 else: updates += 1 if updates >= settings.MAX_UPDATES_PER_SECOND: time.sleep( int(t2 + 1) - t2 ) time.sleep(5)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" updates = 0 lastSecond = 0 while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None for schema in schemas: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [ archive.getTuple() for archive in schema.archives ] break if not archiveConfig: raise Exception( "No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) os.system("mkdir -p -m 755 '%s'" % dbDir) log.creates("creating database file %s" % dbFilePath) whisper.create(dbFilePath, archiveConfig) os.chmod(dbFilePath, 0755) increment('creates') # Create metadata file dbFileName = basename(dbFilePath) metaFilePath = join( dbDir, dbFileName[:-len('.wsp')] + '.context.pickle') createMetaFile(metric, schema, metaFilePath) try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) t2 = time.time() updateTime = t2 - t1 except: log.err() increment('errors') else: pointCount = len(datapoints) increment('committedPoints', pointCount) append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Rate limit update operations thisSecond = int(t2) if thisSecond != lastSecond: lastSecond = thisSecond updates = 0 else: updates += 1 if updates >= settings.MAX_UPDATES_PER_SECOND: time.sleep(int(t2 + 1) - t2) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)
for tinterval in zip(timepoints_to_update[:-1], timepoints_to_update[1:]): # TODO: Setting lo= parameter for 'lefti' based on righti from previous # iteration. Obviously, this can only be done if # timepoints_to_update is always updated. Is it? lefti = bisect.bisect_left(oldtimestamps, tinterval[0]) righti = bisect.bisect_left(oldtimestamps, tinterval[1], lo=lefti) newvalues = oldvalues[lefti:righti] if newvalues: non_none = filter(lambda x: x is not None, newvalues) if 1.0 * len(non_none) / len(newvalues) >= xff: newdatapoints.append([ tinterval[0], whisper.aggregate(aggregationMethod, non_none) ]) whisper.update_many(newfile, newdatapoints) else: print 'Migrating data without aggregation...' for archive in old_archives: timeinfo, values = archive['data'] datapoints = zip(range(*timeinfo), values) datapoints = filter(lambda p: p[1] is not None, datapoints) whisper.update_many(newfile, datapoints) if options.newfile is not None: sys.exit(0) backup = path + '.bak' print 'Renaming old database to: %s' % backup os.rename(path, backup)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" updates = 0 lastSecond = 0 while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in schemas: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break for schema in agg_schemas: if schema.matches(metric): log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) try: os.makedirs(dbDir, 0755) except OSError as e: log.err("%s" % e) log.creates("creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod, settings.WHISPER_SPARSE_CREATE, settings.WHISPER_FALLOCATE_CREATE) instrumentation.increment('creates') try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) t2 = time.time() updateTime = t2 - t1 except: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Rate limit update operations thisSecond = int(t2) if thisSecond != lastSecond: lastSecond = thisSecond updates = 0 else: updates += 1 if updates >= settings.MAX_UPDATES_PER_SECOND: time.sleep(int(t2 + 1) - t2) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)
def convert_rrd(rrd_file, dest_dir): datasource_map = { 'OUTOCTETS': 'out_octets', 'OUTUCASTPKTS': 'out_unicast_packets', 'OUTNUCASTPKTS': 'out_nunicast_packets', 'INNUCASTPKTS': 'in_nunicast_packets', 'INERRORS': 'in_errors', 'OUTERRORS': 'out_errors', 'INUCASTPKTS': 'in_unicast_packets', 'INOCTETS': 'in_octets', } rra_indices = [] rrd_info = rrdtool.info(rrd_file) seconds_per_pdp = rrd_info['step'] for key in rrd_info: if key.startswith('rra['): index = int(key.split('[')[1].split(']')[0]) rra_indices.append(index) rra_count = max(rra_indices) + 1 rras = [] for i in range(rra_count): rra_info = {} rra_info['pdp_per_row'] = rrd_info['rra[%d].pdp_per_row' % i] rra_info['rows'] = rrd_info['rra[%d].rows' % i] rra_info['cf'] = rrd_info['rra[%d].cf' % i] if 'xff' in rrd_info: rra_info['xff'] = rrd_info['rra[%d].xff' % i] rras.append(rra_info) datasources = [] if 'ds' in rrd_info: datasources = rrd_info['ds'].keys() else: ds_keys = [key for key in rrd_info if key.startswith('ds[')] datasources = list(set(key[3:].split(']')[0] for key in ds_keys)) relevant_rras = [] for rra in rras: if rra['cf'] == 'MAX': relevant_rras.append(rra) archives = [] for rra in relevant_rras: precision = rra['pdp_per_row'] * seconds_per_pdp points = rra['rows'] archives.append((precision, points)) for datasource in datasources: now = int(time.time()) d = datasource_map[datasource] dest_path = f"{dest_dir}/{d}.wsp" try: whisper.create(dest_path, archives, xFilesFactor=0.5) except whisper.InvalidConfiguration: pass datapoints = [] for precision, points in reversed(archives): retention = precision * points endTime = now - now % precision startTime = endTime - retention (time_info, columns, rows) = rrdtool.fetch(rrd_file, 'MAX', '-r', str(precision), '-s', str(startTime), '-e', str(endTime), '-a') column_index = list(columns).index(datasource) rows.pop() values = [row[column_index] for row in rows] timestamps = list(range(*time_info)) datapoints = zip(timestamps, values) datapoints = [ datapoint for datapoint in datapoints if datapoint[1] is not None ] whisper.update_many(dest_path, datapoints)
def update_many(self, metric, datapoints, retention_config): ''' Update datapoints but quietly ignore the retention_config ''' return whisper.update_many(self.getFilesystemPath(metric), datapoints)
except whisper.InvalidConfiguration as e: raise SystemExit('[ERROR] %s' % str(e)) size = os.stat(path).st_size archiveConfig = ','.join(["%d:%d" % ar for ar in archives]) print("Created: %s (%d bytes) with archives: %s" % (path, size, archiveConfig)) print("Migrating data") archiveNumber = len(archives) - 1 for precision, points in reversed(archives): retention = precision * points endTime = now - now % precision startTime = endTime - retention (time_info, columns, rows) = rrdtool.fetch(rrd_path, options.aggregationMethod.upper(), '-r', str(precision), '-s', str(startTime), '-e', str(endTime)) column_index = list(columns).index(datasource) rows.pop( ) # remove the last datapoint because RRD sometimes gives funky values values = [row[column_index] for row in rows] timestamps = list(range(*time_info)) datapoints = zip(timestamps, values) datapoints = filter(lambda p: p[1] is not None, datapoints) print(' migrating %d datapoints from archive %d' % (len(datapoints), archiveNumber)) archiveNumber -= 1 whisper.update_many(path, datapoints)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None xFilesFactor, aggregationMethod = None, None for schema in SCHEMAS: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [ archive.getTuple() for archive in schema.archives ] break for schema in AGGREGATION_SCHEMAS: if schema.matches(metric): log.creates( 'new metric %s matched aggregation schema %s' % (metric, schema.name)) xFilesFactor, aggregationMethod = schema.archives break if not archiveConfig: raise Exception( "No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) try: if not exists(dbDir): os.makedirs(dbDir) except OSError, e: log.err("%s" % e) log.creates( "creating database file %s (archive=%s xff=%s agg=%s)" % (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)) try: whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod, settings.WHISPER_SPARSE_CREATE, settings.WHISPER_FALLOCATE_CREATE) instrumentation.increment('creates') except: log.err("Error creating %s" % (dbFilePath)) continue # If we've got a rate limit configured lets makes sure we enforce it if UPDATE_BUCKET: UPDATE_BUCKET.drain(1, blocking=True) try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) updateTime = time.time() - t1 except Exception: log.msg("Error writing to %s" % (dbFilePath)) log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)
def writeCachedDataPoints(): "Write datapoints until the MetricCache is completely empty" updates = 0 lastSecond = 0 while MetricCache: dataWritten = False for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder(): dataWritten = True if not dbFileExists: archiveConfig = None for schema in schemas: if schema.matches(metric): log.creates('new metric %s matched schema %s' % (metric, schema.name)) archiveConfig = [archive.getTuple() for archive in schema.archives] break if not archiveConfig: raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric) dbDir = dirname(dbFilePath) os.system("mkdir -p -m 755 '%s'" % dbDir) log.creates("creating database file %s" % dbFilePath) whisper.create(dbFilePath, archiveConfig) os.chmod(dbFilePath, 0755) instrumentation.increment('creates') # Create metadata file dbFileName = basename(dbFilePath) metaFilePath = join(dbDir, dbFileName[ :-len('.wsp') ] + '.context.pickle') createMetaFile(metric, schema, metaFilePath) try: t1 = time.time() whisper.update_many(dbFilePath, datapoints) t2 = time.time() updateTime = t2 - t1 except: log.err() instrumentation.increment('errors') else: pointCount = len(datapoints) instrumentation.increment('committedPoints', pointCount) instrumentation.append('updateTimes', updateTime) if settings.LOG_UPDATES: log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime)) # Rate limit update operations thisSecond = int(t2) if thisSecond != lastSecond: lastSecond = thisSecond updates = 0 else: updates += 1 if updates >= settings.MAX_UPDATES_PER_SECOND: time.sleep( int(t2 + 1) - t2 ) # Avoid churning CPU when only new metrics are in the cache if not dataWritten: time.sleep(0.1)