def test_setAggregation(self): """Create a db, change aggregation, xFilesFactor, then use info() to validate""" retention = [(1, 60), (60, 60)] # create a new db with a valid configuration whisper.create(self.db, retention) #set setting every AggregationMethod available for ag in whisper.aggregationMethods: for xff in 0.0,0.2,0.4,0.7,0.75,1.0: #original xFilesFactor info0 = whisper.info(self.db) #optional xFilesFactor not passed whisper.setAggregationMethod(self.db, ag) #original value should not change info1 = whisper.info(self.db) self.assertEqual(info0['xFilesFactor'],info1['xFilesFactor']) #the selected aggregation method should have applied self.assertEqual(ag,info1['aggregationMethod']) #optional xFilesFactor used whisper.setAggregationMethod(self.db, ag, xff) #new info should match what we just set it to info2 = whisper.info(self.db) #packing and unpacking because #AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f",xff))[0] self.assertEqual(info2['xFilesFactor'], target_xff) #same aggregationMethod asssertion again, but double-checking since #we are playing with packed values and seek() self.assertEqual(ag,info2['aggregationMethod']) self._removedb()
def test_normal(self): whisper.create(self.filename, [(1, 60), (60, 60)]) whisper.CACHE_HEADERS = True whisper.info(self.filename) whisper.info(self.filename) whisper.CACHE_HEADERS = False
def test_setAggregation(self): """Create a db, change aggregation, xFilesFactor, then use info() to validate""" retention = [(1, 60), (60, 60)] # create a new db with a valid configuration whisper.create(self.db, retention) #set setting every AggregationMethod available for ag in whisper.aggregationMethods: for xff in 0.0, 0.2, 0.4, 0.7, 0.75, 1.0: #original xFilesFactor info0 = whisper.info(self.db) #optional xFilesFactor not passed whisper.setAggregationMethod(self.db, ag) #original value should not change info1 = whisper.info(self.db) self.assertEqual(info0['xFilesFactor'], info1['xFilesFactor']) #the selected aggregation method should have applied self.assertEqual(ag, info1['aggregationMethod']) #optional xFilesFactor used whisper.setAggregationMethod(self.db, ag, xff) #new info should match what we just set it to info2 = whisper.info(self.db) #packing and unpacking because #AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f", xff))[0] self.assertEqual(info2['xFilesFactor'], target_xff) #same aggregationMethod asssertion again, but double-checking since #we are playing with packed values and seek() self.assertEqual(ag, info2['aggregationMethod']) self._removedb()
def ccmerge(path_from, path_to): """Carbon-copy style of merge: instead of propagating datapoints from the source WSP file through archives in the destination WSP file the datapoint is copied to the corresponding slot in corresponding archives. Works only with archives having the same retention schema. Only datapoints missing in the destination archive are copied (i.e. existing datapoints in the destination archive do not get overwritten)""" import fcntl map_from = mmap_file(path_from) fd = os.open(path_to, os.O_RDWR) fcntl.flock(fd, fcntl.LOCK_EX) map_to = mmap.mmap(fd, 0, prot=mmap.PROT_WRITE) dstHeader = whisper.info(path_from) srcHeader = whisper.info(path_to) srcArchives = srcHeader['archives'] dstArchives = dstHeader['archives'] for srcArchive, dstArchive in zip(srcArchives, dstArchives): for p in ('points', 'secondsPerPoint'): if srcArchive[p] != dstArchive[p]: raise Exception, "%s and %s have different number of %s: %d vs %d" % (path_from, path_to, p, srcArchive[p], dstArchive[p]) for srcArchive, dstArchive in zip(srcArchives, dstArchives): srcArchiveStart = seriesStart(srcArchive, map_from) dstArchiveStart = seriesStart(dstArchive, map_to) # source archive is empty => nothing to copy if srcArchiveStart is None: continue # destination archive is empty => does not matter which postition we put data to if dstArchiveStart is None: dstArchiveStart = 0 # find the difference in alignment step = srcArchive['secondsPerPoint'] alignmentDiff = (dstArchiveStart - srcArchiveStart) / step # offset in number of datapoints between source and destination archives # iterate through points and copy them base_offset = srcArchive['offset'] points = srcArchive['points'] for pointInSrc in xrange(points): pointInDst = pointInSrc - alignmentDiff # archive is circular if pointInDst < 0: pointInDst += points if pointInDst >= points: pointInDst -= points dstOffset = base_offset + pointInDst * whisper.pointSize srcOffset = base_offset + pointInSrc * whisper.pointSize (dstTimestamp, dstValue) = whisper.struct.unpack(whisper.pointFormat, map_to[dstOffset:dstOffset+whisper.pointSize]) # we have the datapoint in the destination arhive, no need to copy if dstTimestamp != 0: continue (srcTimestamp, srcValue) = whisper.struct.unpack(whisper.pointFormat, map_from[srcOffset:srcOffset+whisper.pointSize]) # datapoint is missing in the source archive as well, nothing to copy if srcTimestamp == 0: continue # copy the datapoint map_to[dstOffset:dstOffset+whisper.pointSize] = whisper.struct.pack(whisper.pointFormat, srcTimestamp, srcValue) map_to.flush() os.close(fd)
def test_setAggregation(self): """ Create a db, change aggregation, xFilesFactor, then use info() to validate """ original_lock = whisper.LOCK original_caching = whisper.CACHE_HEADERS original_autoflush = whisper.AUTOFLUSH whisper.LOCK = True whisper.AUTOFLUSH = True whisper.CACHE_HEADERS = True # create a new db with a valid configuration whisper.create(self.filename, self.retention) with AssertRaisesException(whisper.InvalidAggregationMethod('Unrecognized aggregation method: yummy beer')): whisper.setAggregationMethod(self.filename, 'yummy beer') #set setting every AggregationMethod available for ag in whisper.aggregationMethods: for xff in 0.0, 0.2, 0.4, 0.7, 0.75, 1.0: # original xFilesFactor info0 = whisper.info(self.filename) # optional xFilesFactor not passed old_ag = whisper.setAggregationMethod(self.filename, ag) # should return old aggregationmethod self.assertEqual(old_ag, info0['aggregationMethod']) # original value should not change info1 = whisper.info(self.filename) self.assertEqual(info0['xFilesFactor'], info1['xFilesFactor']) # the selected aggregation method should have applied self.assertEqual(ag, info1['aggregationMethod']) # optional xFilesFactor used old_ag = whisper.setAggregationMethod(self.filename, ag, xff) # should return old aggregationmethod self.assertEqual(old_ag, info1['aggregationMethod']) # new info should match what we just set it to info2 = whisper.info(self.filename) # packing and unpacking because # AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f", xff))[0] self.assertEqual(info2['xFilesFactor'], target_xff) # same aggregationMethod assertion again, but double-checking since # we are playing with packed values and seek() self.assertEqual(ag, info2['aggregationMethod']) with SimulatedCorruptWhisperFile(): with AssertRaisesException(whisper.CorruptWhisperFile('Unable to read header', self.filename)): whisper.setAggregationMethod(self.filename, ag) whisper.LOCK = original_lock whisper.AUTOFLUSH = original_autoflush whisper.CACHE_HEADERS = original_caching
def test_setAggregation(self): """ Create a db, change aggregation, xFilesFactor, then use info() to validate """ original_lock = whisper.LOCK original_caching = whisper.CACHE_HEADERS original_autoflush = whisper.AUTOFLUSH whisper.LOCK = True whisper.AUTOFLUSH = True whisper.CACHE_HEADERS = True # create a new db with a valid configuration whisper.create(self.filename, self.retention) with AssertRaisesException( whisper.InvalidAggregationMethod( 'Unrecognized aggregation method: yummy beer')): whisper.setAggregationMethod(self.filename, 'yummy beer') #set setting every AggregationMethod available for ag in whisper.aggregationMethods: for xff in 0.0, 0.2, 0.4, 0.7, 0.75, 1.0: # original xFilesFactor info0 = whisper.info(self.filename) # optional xFilesFactor not passed whisper.setAggregationMethod(self.filename, ag) # original value should not change info1 = whisper.info(self.filename) self.assertEqual(info0['xFilesFactor'], info1['xFilesFactor']) # the selected aggregation method should have applied self.assertEqual(ag, info1['aggregationMethod']) # optional xFilesFactor used whisper.setAggregationMethod(self.filename, ag, xff) # new info should match what we just set it to info2 = whisper.info(self.filename) # packing and unpacking because # AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f", xff))[0] self.assertEqual(info2['xFilesFactor'], target_xff) # same aggregationMethod asssertion again, but double-checking since # we are playing with packed values and seek() self.assertEqual(ag, info2['aggregationMethod']) with SimulatedCorruptWhisperFile(): with AssertRaisesException( whisper.CorruptWhisperFile('Unable to read header', self.filename)): whisper.setAggregationMethod(self.filename, ag) whisper.LOCK = original_lock whisper.AUTOFLUSH = original_autoflush whisper.CACHE_HEADERS = original_caching
def test_info_bogus_file(self): self.assertIsNone(whisper.info('bogus-file')) # Validate "corrupt" whisper metadata whisper.create(self.filename, self.retention) with SimulatedCorruptWhisperFile(): with AssertRaisesException(whisper.CorruptWhisperFile('Unable to read header', self.filename)): whisper.info(self.filename) # Validate "corrupt" whisper archive data with SimulatedCorruptWhisperFile(corrupt_archive=True): with AssertRaisesException(whisper.CorruptWhisperFile('Unable to read archive0 metadata', self.filename)): whisper.info(self.filename)
def test_create(self): """Create a db and use info() to validate""" retention = [(1, 60), (60, 60)] # check if invalid configuration fails successfully with self.assertRaises(whisper.InvalidConfiguration): whisper.create(self.db, []) # create a new db with a valid configuration whisper.create(self.db, retention) # attempt to create another db in the same file, this should fail with self.assertRaises(whisper.InvalidConfiguration): whisper.create(self.db, 0) info = whisper.info(self.db) # check header information self.assertEqual(info['maxRetention'], max([a[0] * a[1] for a in retention])) self.assertEqual(info['aggregationMethod'], 'average') self.assertEqual(info['xFilesFactor'], 0.5) # check archive information self.assertEqual(len(info['archives']), len(retention)) self.assertEqual(info['archives'][0]['points'], retention[0][1]) self.assertEqual(info['archives'][0]['secondsPerPoint'], retention[0][0]) self.assertEqual(info['archives'][0]['retention'], retention[0][0] * retention[0][1]) self.assertEqual(info['archives'][1]['retention'], retention[1][0] * retention[1][1]) # remove database self._removedb() #as written, invlaid aggregation methods default back to 'average' for bad_m in self.bad_methods: whisper.create(self.db, retention, xFilesFactor=None,aggregationMethod=bad_m) info = whisper.info(self.db) self.assertEqual(info['aggregationMethod'], 'average') self._removedb() for f in self.bad_xff: with self.assertRaises(ValueError): whisper.create(self.db, retention, xFilesFactor=f) #assure files with bad XFilesFactors are not created with self.assertRaises(IOError): with open(self.db): pass
def test_create_and_info(self): """ Create a db and use info() to validate """ # check if invalid configuration fails successfully for retention in (0, []): with AssertRaisesException(whisper.InvalidConfiguration('You must specify at least one archive configuration!')): whisper.create(self.filename, retention) # create a new db with a valid configuration whisper.create(self.filename, self.retention) # Ensure another file can't be created when one exists already with AssertRaisesException(whisper.InvalidConfiguration('File {0} already exists!'.format(self.filename))): whisper.create(self.filename, self.retention) info = whisper.info(self.filename) # check header information self.assertEqual(info['maxRetention'], max([a[0] * a[1] for a in self.retention])) self.assertEqual(info['aggregationMethod'], 'average') self.assertEqual(info['xFilesFactor'], 0.5) # check archive information self.assertEqual(len(info['archives']), len(self.retention)) self.assertEqual(info['archives'][0]['points'], self.retention[0][1]) self.assertEqual(info['archives'][0]['secondsPerPoint'], self.retention[0][0]) self.assertEqual(info['archives'][0]['retention'], self.retention[0][0] * self.retention[0][1]) self.assertEqual(info['archives'][1]['retention'], self.retention[1][0] * self.retention[1][1])
def waterlevel(db_name): """Reduce alert frequency after initial alert, reset on all-clear""" (times, fail_buffer) = whisper.fetch(db_name, 315550800) if fail_buffer.count(1) > 2: new_whisper_db_name = db_name + '.wsp2' whisper.create(new_whisper_db_name, FOLLOWUP, aggregationMethod='last') whisper.update(new_whisper_db_name, 1) os.rename(new_whisper_db_name, db_name) for admin in sys.argv[2:]: os.system('mail -s "' + sys.argv[1] + '" ' + admin + '</dev/null') if fail_buffer.count(1) == 0: if whisper.info( db_name)['archives'][0]['secondsPerPoint'] == FOLLOWUP[0][0]: new_whisper_db_name = db_name + '.wsp2' whisper.create(new_whisper_db_name, RETAINER, aggregationMethod='last') whisper.update(new_whisper_db_name, 0) os.rename(new_whisper_db_name, db_name) for admin in sys.argv[2:]: os.system('mail -s "' + sys.argv[1] + '" ' + admin + '</dev/null') return (0)
def test_create(self): """Create a db and use info() to validate""" retention = [(1, 60), (60, 60)] # check if invalid configuration fails successfully with self.assertRaises(whisper.InvalidConfiguration): whisper.create(self.db, []) # create a new db with a valid configuration whisper.create(self.db, retention) # attempt to create another db in the same file, this should fail with self.assertRaises(whisper.InvalidConfiguration): whisper.create(self.db, 0) info = whisper.info(self.db) # check header information self.assertEqual(info['maxRetention'], max([a[0] * a[1] for a in retention])) self.assertEqual(info['aggregationMethod'], 'average') self.assertEqual(info['xFilesFactor'], 0.5) # check archive information self.assertEqual(len(info['archives']), len(retention)) self.assertEqual(info['archives'][0]['points'], retention[0][1]) self.assertEqual(info['archives'][0]['secondsPerPoint'], retention[0][0]) self.assertEqual(info['archives'][0]['retention'], retention[0][0] * retention[0][1]) self.assertEqual(info['archives'][1]['retention'], retention[1][0] * retention[1][1]) # remove database self._removedb()
def fill_archives(src, dst, startFrom): header = info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: fromTime = time.time() - archive['retention'] if fromTime >= startFrom: continue (timeInfo, values) = fetch(dst, fromTime, startFrom) (start, end, step) = timeInfo gapstart = None for v in values: if not v and not gapstart: gapstart = start elif v and gapstart: # ignore single units lost if (start - gapstart) > archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None elif gapstart and start == end - step: fill(src, dst, gapstart - step, start) start += step startFrom = fromTime
def _read_points(self, path): """Return a list of (timestamp, value).""" info = whisper.info(path) res = [] if not info: return [] archives = info["archives"] with io.open(path, "rb") as f: buf = f.read() stage0 = True for archive in archives: offset = archive["offset"] stage = bg_metric.Stage( precision=archive["secondsPerPoint"], points=archive["points"], stage0=stage0, ) stage0 = False if stage in self._opts.ignored_stages: continue for _ in range(archive["points"]): timestamp, value = _POINT_STRUCT.unpack_from(buf, offset) offset += whisper.pointSize if timestamp == 0: continue elif timestamp >= self.time_start and timestamp <= self.time_end: res.append((timestamp, value, 1, stage)) return res
def fetch(self, startTime, endTime): data = whisper.fetch(self.fs_path, startTime, endTime) if not data: return None time_info, values = data (start,end,step) = time_info meta_info = whisper.info(self.fs_path) aggregation_method = meta_info['aggregationMethod'] lowest_step = min([i['secondsPerPoint'] for i in meta_info['archives']]) # Merge in data from carbon's cache cached_datapoints = [] try: cached_datapoints = CarbonLink.query(self.real_metric_path) except: log.exception("Failed CarbonLink query '%s'" % self.real_metric_path) cached_datapoints = [] if isinstance(cached_datapoints, dict): cached_datapoints = cached_datapoints.items() values = merge_with_cache(cached_datapoints, start, step, values, aggregation_method) return time_info, values
def fetch(self, startTime, endTime, now=None, requestContext=None): try: data = whisper.fetch(self.fs_path, startTime, endTime, now) except IOError: log.exception("Failed fetch of whisper file '%s'" % self.fs_path) return None if not data: return None time_info, values = data (start, end, step) = time_info meta_info = whisper.info(self.fs_path) aggregation_method = meta_info['aggregationMethod'] # Merge in data from carbon's cache cached_datapoints = [] try: cached_datapoints = CarbonLink.query(self.real_metric_path) except: log.exception("Failed CarbonLink query '%s'" % self.real_metric_path) cached_datapoints = [] if isinstance(cached_datapoints, dict): cached_datapoints = cached_datapoints.items() values = merge_with_cache(cached_datapoints, start, step, values, aggregation_method) return time_info, values
def test_file_fetch_edge_cases(self): """ Test some of the edge cases in file_fetch() that should return None or raise an exception """ whisper.create(self.filename, [(1, 60)]) with open(self.filename, 'rb') as fh: msg = "Invalid time interval: from time '{0}' is after until time '{1}'" until_time = 0 from_time = int(time.time()) + 100 with AssertRaisesException( whisper.InvalidTimeInterval(msg.format(from_time, until_time))): whisper.file_fetch(fh, fromTime=from_time, untilTime=until_time) # fromTime > now aka metrics from the future self.assertIsNone( whisper.file_fetch(fh, fromTime=int(time.time()) + 100, untilTime=int(time.time()) + 200), ) # untilTime > oldest time stored in the archive headers = whisper.info(self.filename) the_past = int(time.time()) - headers['maxRetention'] - 200 self.assertIsNone( whisper.file_fetch(fh, fromTime=the_past - 1, untilTime=the_past), ) # untilTime > now, change untilTime to now now = int(time.time()) self.assertEqual( whisper.file_fetch(fh, fromTime=now, untilTime=now + 200, now=now), ((now + 1, now + 2, 1), [None]), )
def fetch(self, startTime, endTime): data = whisper.fetch(self.fs_path, startTime, endTime) if not data: return None consolidationFunc = "" whisper_info = whisper.info(self.fs_path) if "aggregationMethod" in whisper_info: aggregationMethod = whisper_info["aggregationMethod"] if aggregationMethod == 'min' or aggregationMethod == 'max': consolidationFunc = aggregationMethod time_info, values = data (start,end,step) = time_info # Merge in data from carbon's cache try: cached_datapoints = CarbonLink.query(self.real_metric_path) except: log.exception("Failed CarbonLink query '%s'" % self.real_metric_path) cached_datapoints = [] for (timestamp, value) in cached_datapoints: interval = timestamp - (timestamp % step) try: i = int(interval - start) / step values[i] = value except: pass return (time_info, values, consolidationFunc)
def fetch(self, startTime, endTime): data = whisper.fetch(self.fs_path, startTime, endTime) if not data: return None time_info, values = data (start, end, step) = time_info meta_info = whisper.info(self.fs_path) lowest_step = min([i['secondsPerPoint'] for i in meta_info['archives']]) # Merge in data from carbon's cache cached_datapoints = [] try: if step == lowest_step: cached_datapoints = CarbonLink.query(self.real_metric_path) except: log.exception("Failed CarbonLink query '%s'" % self.real_metric_path) cached_datapoints = [] if isinstance(cached_datapoints, dict): cached_datapoints = cached_datapoints.items() for (timestamp, value) in cached_datapoints: interval = timestamp - (timestamp % step) try: i = int(interval - start) / step values[i] = value except: pass return (time_info, values)
def fetch(self, startTime, endTime): data = whisper.fetch(self.fs_path, startTime, endTime) if not data: return None time_info, values = data (start,end,step) = time_info meta_info = whisper.info(self.fs_path) lowest_step = min([i['secondsPerPoint'] for i in meta_info['archives']]) # Merge in data from carbon's cache cached_datapoints = [] try: if step == lowest_step: cached_datapoints = CarbonLink.query(self.real_metric_path) except: log.exception("Failed CarbonLink query '%s'" % self.real_metric_path) cached_datapoints = [] if isinstance(cached_datapoints, dict): cached_datapoints = cached_datapoints.items() for (timestamp, value) in cached_datapoints: interval = timestamp - (timestamp % step) try: i = int(interval - start) / step values[i] = value except: pass return (time_info, values)
def fill_archives(src, dst, startFrom): header = whisper.info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: fromTime = time.time() - archive['retention'] if fromTime >= startFrom: continue (timeInfo, values) = whisper.fetch(dst, fromTime, startFrom) (start, end, step) = timeInfo gapstart = None for v in values: if not v and not gapstart: gapstart = start elif v and gapstart: # ignore single units lost if (start - gapstart) > archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None elif gapstart and start == end - step: fill(src, dst, gapstart - step, start) start += step startFrom = fromTime
def fetch(self, startTime, endTime): try: data = whisper.fetch(self.fs_path, startTime, endTime) except IOError: log.exception("Failed fetch of whisper file '%s'" % self.fs_path) return None if not data: return None time_info, values = data (start,end,step) = time_info meta_info = whisper.info(self.fs_path) aggregation_method = meta_info['aggregationMethod'] lowest_step = min([i['secondsPerPoint'] for i in meta_info['archives']]) # Merge in data from carbon's cache cached_datapoints = [] try: cached_datapoints = CarbonLink().query(self.real_metric_path) except: log.exception("Failed CarbonLink query '%s'" % self.real_metric_path) cached_datapoints = [] if isinstance(cached_datapoints, dict): cached_datapoints = cached_datapoints.items() values = merge_with_cache(cached_datapoints, start, step, values, aggregation_method) return time_info, values
def _read_points(path): """Return a list of (timestamp, value).""" info = whisper.info(path) res = [] if not info: return [] archives = info["archives"] with open(path) as f: buf = f.read() for archive in archives: offset = archive["offset"] step = archive["secondsPerPoint"] expected_next_timestamp = 0 for _ in range(archive["points"]): timestamp, val = _POINT_STRUCT.unpack_from(buf, offset) # Detect holes in data. The heuristic is the following: # - If a value is non-zero, it is assumed to be meaningful. # - If it is a zero with a fresh timestamp relative to the last # time we saw meaningful data, it is assumed to be meaningful. # So it unfortunately skips leading zeroes after a gap. if timestamp != expected_next_timestamp and val == 0: expected_next_timestamp += step continue else: expected_next_timestamp = timestamp + step res.append((timestamp, val)) offset += whisper.pointSize return res
def getMetadata(metric, key): if key != 'aggregationMethod': return dict(error="Unsupported metadata key \"%s\"" % key) wsp_path = getFilesystemPath(metric) try: value = whisper.info(wsp_path)['aggregationMethod'] return dict(value=value) except Exception: log.err() return dict(error=traceback.format_exc())
def _read_metadata(metric_name, path): info = whisper.info(path) retentions = [ (a["secondsPerPoint"], a["points"]) for a in info["archives"] ] return bg_accessor.MetricMetadata( name=metric_name, carbon_aggregation=info["aggregationMethod"], carbon_retentions=retentions, carbon_xfilesfactor=info["xFilesFactor"], )
def _read_metadata(metric_name, path): info = whisper.info(path) retentions = bg_accessor.Retention([ bg_accessor.Stage(precision=a["secondsPerPoint"], points=a["points"]) for a in info["archives"] ]) aggregator = bg_accessor.Aggregator.from_carbon_name(info["aggregationMethod"]) return bg_accessor.MetricMetadata( aggregator=aggregator, retention=retentions, carbon_xfilesfactor=info["xFilesFactor"], )
def fill_archives(src, dst, start_from, end_at=0, overwrite=False, lock_writes=False): """ Fills gaps in dst using data from src. src is the path as a string dst is the path as a string start_from is the latest timestamp (archives are read backward) end_at is the earliest timestamp (archives are read backward). if absent, we take the earliest timestamp in the archive overwrite will write all non null points from src dst. lock using whisper lock if true """ if lock_writes is False: whisper.LOCK = False elif whisper.CAN_LOCK and lock_writes is True: whisper.LOCK = True header = whisper.info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: from_time = max(end_at, time.time() - archive['retention']) if from_time >= start_from: continue (timeInfo, values) = whisper.fetch(dst, from_time, untilTime=start_from) (start, end, step) = timeInfo gapstart = None for value in values: has_value = bool(value and not overwrite) if not has_value and not gapstart: gapstart = start elif has_value and gapstart: if (start - gapstart) >= archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None start += step # fill if this gap continues to the end if gapstart: fill(src, dst, gapstart - step, end - step) # The next archive only needs to be filled up to the latest point # in time we updated. start_from = from_time
def __init__(self, *args, **kwargs): Leaf.__init__(self, *args, **kwargs) real_fs_path = realpath(self.fs_path) start = time.time() - whisper.info(self.fs_path)['maxRetention'] end = max( os.stat(self.fs_path).st_mtime, start ) self.intervals = [ (start, end) ] if real_fs_path != self.fs_path: relative_fs_path = self.metric_path.replace('.', '/') + self.extension base_fs_path = self.fs_path[ :-len(relative_fs_path) ] relative_real_fs_path = real_fs_path[ len(base_fs_path): ] self.real_metric = relative_real_fs_path[ :-len(self.extension) ].replace('/', '.')
def __init__(self, *args, **kwargs): Leaf.__init__(self, *args, **kwargs) real_fs_path = realpath(self.fs_path) start = time.time() - whisper.info(self.fs_path)['maxRetention'] end = max( os.stat(self.fs_path).st_mtime, start ) self.intervals = [ (start, end) ] if real_fs_path != self.fs_path: relative_fs_path = self.metric_path.replace('.', '/') + self.extension base_fs_path = realpath(self.fs_path[ :-len(relative_fs_path) ]) relative_real_fs_path = real_fs_path[ len(base_fs_path)+1: ] self.real_metric = relative_real_fs_path[ :-len(self.extension) ].replace('/', '.')
def test_set_xfilesfactor(self): """ Create a whisper file Update xFilesFactor Check if update succeeded Check if exceptions get raised with wrong input """ whisper.create(self.filename, [(1, 20)]) target_xff = 0.42 info0 = whisper.info(self.filename) old_xff = whisper.setXFilesFactor(self.filename, target_xff) # return value should match old xff self.assertEqual(info0['xFilesFactor'], old_xff) info1 = whisper.info(self.filename) # Other header information should not change self.assertEqual(info0['aggregationMethod'], info1['aggregationMethod']) self.assertEqual(info0['maxRetention'], info1['maxRetention']) self.assertEqual(info0['archives'], info1['archives']) # packing and unpacking because # AssertionError: 0.20000000298023224 != 0.2 target_xff = struct.unpack("!f", struct.pack("!f", target_xff))[0] self.assertEqual(info1['xFilesFactor'], target_xff) with AssertRaisesException( whisper.InvalidXFilesFactor('Invalid xFilesFactor zero, not a ' 'float')): whisper.setXFilesFactor(self.filename, "zero") for invalid_xff in -1, 2: with AssertRaisesException( whisper.InvalidXFilesFactor('Invalid xFilesFactor %s, not ' 'between 0 and 1' % float(invalid_xff))): whisper.setXFilesFactor(self.filename, invalid_xff)
def fsck(path, fix=False): try: info = whisper.info(path) except whisper.CorruptWhisperFile as e: print "Found: %s" % str(e) badname = path + ".corrupt.%s" % time.strftime("%Y%m%d-%H%M%S") if fix: print "Moving %s => %s" % (path, badname) os.rename(path, badname) return 1 except Exception as e: print "ERROR (unhandled): %s" % str(e) return 1 return 0
def _read_metadata(metric_name, path): info = whisper.info(path) if not info: return None retentions = bg_metric.Retention([ bg_metric.Stage(precision=a["secondsPerPoint"], points=a["points"]) for a in info["archives"] ]) aggregator = bg_metric.Aggregator.from_carbon_name( info["aggregationMethod"]) return bg_metric.MetricMetadata.create( aggregator=aggregator, retention=retentions, carbon_xfilesfactor=info["xFilesFactor"], )
def fill_archives(src, dst, startFrom, endAt=0, overwrite=False, lock_writes=False): """ Fills gaps in dst using data from src. src is the path as a string dst is the path as a string startFrom is the latest timestamp (archives are read backward) endAt is the earliest timestamp (archives are read backward). if absent, we take the earliest timestamp in the archive overwrite will write all non null points from src dst. lock using whisper lock if true """ if lock_writes is False: whisper.LOCK = False elif whisper.CAN_LOCK and lock_writes is True: whisper.LOCK = True header = whisper.info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: fromTime = max(endAt, time.time() - archive['retention']) if fromTime >= startFrom: continue (timeInfo, values) = whisper.fetch(dst, fromTime, untilTime=startFrom) (start, end, step) = timeInfo gapstart = None for value in values: has_value = bool(value and not overwrite) if not has_value and not gapstart: gapstart = start elif has_value and gapstart: if (start - gapstart) >= archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None start += step # fill if this gap continues to the end if gapstart: fill(src, dst, gapstart - step, end - step) # The next archive only needs to be filled up to the latest point # in time we updated. startFrom = fromTime
def diff_file_conf(metric, filepath): """ Returns true if the actual file has parameters different from those in the configuration files """ (archiveConfig, xFilesFactor, aggregationMethod) = get_archive_config(metric) info = whisper.info(filepath) if info['xFilesFactor'] != xFilesFactor or info['aggregationMethod'] != aggregationMethod: #print "{0} {1}".format(info['aggregationMethod'], aggregationMethod) #print "{0} {1}".format(info['xFilesFactor'], xFilesFactor) return True for (archivefile, archiveconf) in zip(info['archives'], archiveConfig): (secondsPerPoint, points) = archiveconf #print "{0} {1}".format(archivefile['secondsPerPoint'], secondsPerPoint) #print "{0} {1}".format(archivefile['points'], points) if archivefile['secondsPerPoint'] != secondsPerPoint or archivefile['points'] != points: return True
def fill(src, dst, tstart, tstop): # fetch range start-stop from src, taking values from the highest # precision archive, thus optionally requiring multiple fetch + merges srcHeader = info(src) srcArchives = srcHeader["archives"] srcArchives.sort(key=itemgetter("retention")) # find oldest point in time, stored by both files srcTime = int(time.time()) - srcHeader["maxRetention"] if tstart < srcTime and tstop < srcTime: return # we want to retain as much precision as we can, hence we do backwards # walk in time # skip forward at max 'step' points at a time for archive in srcArchives: # skip over archives that don't have any data points rtime = time.time() - archive["retention"] if tstop <= rtime: continue untilTime = tstop fromTime = rtime if rtime > tstart else tstart (timeInfo, values) = fetch(src, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list( itertools.ifilter( lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values) ) ) # order points by timestamp, newest first pointsToWrite.sort(key=lambda p: p[0], reverse=True) update_many(dst, pointsToWrite) tstop = fromTime # can stop when there's nothing to fetch any more if tstart == tstop: return
def fill(src, dst, tstart, tstop): # fetch range start-stop from src, taking values from the highest # precision archive, thus optionally requiring multiple fetch + merges srcHeader = info(src) srcArchives = srcHeader['archives'] srcArchives.sort(key=itemgetter('retention')) # find oldest point in time, stored by both files srcTime = int(time.time()) - srcHeader['maxRetention'] if tstart < srcTime and tstop < srcTime: return # we want to retain as much precision as we can, hence we do backwards # walk in time # skip forward at max 'step' points at a time for archive in srcArchives: # skip over archives that don't have any data points rtime = time.time() - archive['retention'] if tstop <= rtime: continue untilTime = tstop fromTime = rtime if rtime > tstart else tstart (timeInfo, values) = fetch(src, fromTime, untilTime) (start, end, archive_step) = timeInfo pointsToWrite = list( itertools.ifilter( lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values))) # order points by timestamp, newest first pointsToWrite.sort(key=lambda p: p[0], reverse=True) update_many(dst, pointsToWrite) tstop = fromTime # can stop when there's nothing to fetch any more if tstart == tstop: return
def walk_dir(base_dir, delete_corrupt=False, verbose=False): for dirpath, dirnames, filenames in os.walk(base_dir): if verbose: print("Scanning %s…" % dirpath) whisper_files = (os.path.join(dirpath, i) for i in filenames if i.endswith('.wsp')) for f in whisper_files: try: info = whisper.info(f) except whisper.CorruptWhisperFile: if delete_corrupt: print('Deleting corrupt Whisper file: %s' % f, file=sys.stderr) os.unlink(f) else: print('Corrupt Whisper file: %s' % f, file=sys.stderr) continue if verbose: print('%s: %d points' % (f, sum(i['points'] for i in info.get('archives', {}))))
def _read_points(path): """Return a list of (timestamp, value).""" info = whisper.info(path) res = [] if not info: return [] archives = info["archives"] with open(path) as f: buf = f.read() # Two or more archives can contain a given timestamp. # As archives are from most precise to least precise, we track the oldest # point we've found in more precise archives and ignore the newer ones. prev_archive_starts_at = float("inf") for archive in archives: offset = archive["offset"] step = archive["secondsPerPoint"] archive_starts_at = 0 expected_next_timestamp = 0 for _ in range(archive["points"]): timestamp, val = _POINT_STRUCT.unpack_from(buf, offset) # Detect holes in data. The heuristic is the following: # - If a value is non-zero, it is assumed to be meaningful. # - If it is a zero with a fresh timestamp relative to the last # time we saw meaningful data, it is assumed to be meaningful. # So it unfortunately skips leading zeroes after a gap. if timestamp != expected_next_timestamp and val == 0: expected_next_timestamp += step continue else: expected_next_timestamp = timestamp + step archive_starts_at = min(timestamp, archive_starts_at) if timestamp < prev_archive_starts_at: res.append((timestamp, val)) offset += whisper.pointSize prev_archive_starts_at = archive_starts_at return res
def diff_file_conf(metric, filepath): """ Returns true if the actual file has parameters different from those in the configuration files """ (archiveConfig, xFilesFactor, aggregationMethod) = get_archive_config(metric) info = whisper.info(filepath) if info['xFilesFactor'] != xFilesFactor or info[ 'aggregationMethod'] != aggregationMethod: #print "{0} {1}".format(info['aggregationMethod'], aggregationMethod) #print "{0} {1}".format(info['xFilesFactor'], xFilesFactor) return True for (archivefile, archiveconf) in zip(info['archives'], archiveConfig): (secondsPerPoint, points) = archiveconf #print "{0} {1}".format(archivefile['secondsPerPoint'], secondsPerPoint) #print "{0} {1}".format(archivefile['points'], points) if archivefile['secondsPerPoint'] != secondsPerPoint or archivefile[ 'points'] != points: return True
def fetch(self, startTime, endTime): try: data = whisper.fetch(self.fs_path, startTime, endTime) except IOError: log.exception("Failed fetch of whisper file '%s'" % self.fs_path) return None if not data: return None time_info, values = data (start, end, step) = time_info meta_info = whisper.info(self.fs_path) aggregation_method = meta_info['aggregationMethod'] lowest_step = min([i['secondsPerPoint'] for i in meta_info['archives']]) # Merge in data from carbon's cache values = merge_with_carbonlink( self.real_metric_path, start, step, values, aggregation_method) return time_info, values
def fill_archives(src, dst, startFrom, endAt=0): """ Fills gaps in dst using data from src. src is the path as a string dst is the path as a string startFrom is the latest timestamp (archives are read backward) endAt is the earliest timestamp (archives are read backward). if absent, we take the earliest timestamp in the archive """ header = info(dst) archives = header['archives'] archives = sorted(archives, key=lambda t: t['retention']) for archive in archives: fromTime = max(endAt, time.time() - archive['retention']) if fromTime >= startFrom: continue (timeInfo, values) = fetch(dst, fromTime, untilTime=startFrom) (start, end, step) = timeInfo gapstart = None for v in values: if not v and not gapstart: gapstart = start elif v and gapstart: # ignore single units lost if (start - gapstart) > archive['secondsPerPoint']: fill(src, dst, gapstart - step, start) gapstart = None elif gapstart and start == end - step: fill(src, dst, gapstart - step, start) start += step # The next archive only needs to be filled up to the latest point # in time we updated. startFrom = fromTime
option_parser = optparse.OptionParser(usage='''%prog path [field]''') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) path = args[0] if len(args) > 1: field = args[1] else: field = None try: info = whisper.info(path) except whisper.WhisperException as exc: raise SystemExit('[ERROR] %s' % str(exc)) info['fileSize'] = os.stat(path).st_size if field: if field not in info: print('Unknown field "%s". Valid fields are %s' % (field, ','.join(info))) sys.exit(1) print(info[field]) sys.exit(0) archives = info.pop('archives')
def processMetric(fullPath, schemas, agg_schemas): """ method to process a given metric, and resize it if necessary Parameters: fullPath - full path to the metric whisper file schemas - carbon storage schemas loaded from config agg_schemas - carbon storage aggregation schemas load from confg """ schema_config_args = '' schema_file_args = '' rebuild = False messages = '' # get archive info from whisper file info = whisper.info(fullpath) # get graphite metric name from fullPath metric = getMetricFromPath(fullpath) # loop the carbon-storage schemas for schema in schemas: if schema.matches(metric): # returns secondsPerPoint and points for this schema in tuple format archive_config = [archive.getTuple() for archive in schema.archives] break # loop through the carbon-aggregation schemas for agg_schema in agg_schemas: if agg_schema.matches(metric): xFilesFactor, aggregationMethod = agg_schema.archives break # loop through the bucket tuples and convert to string format for resizing for retention in archive_config: current_schema = '%s:%s ' % (retention[0], retention[1]) schema_config_args += current_schema # loop through the current files bucket sizes and convert to string format to compare for resizing for fileRetention in info['archives']: current_schema = '%s:%s ' % (fileRetention['secondsPerPoint'], fileRetention['points']) schema_file_args += current_schema # check to see if the current and configured schemas are the same or rebuild if (schema_config_args != schema_file_args): rebuild = True messages += 'updating Retentions from: %s to: %s \n' % (schema_file_args, schema_config_args) # only care about the first two decimals in the comparison since there is floaty stuff going on. info_xFilesFactor = "{0:.2f}".format(info['xFilesFactor']) str_xFilesFactor = "{0:.2f}".format(xFilesFactor) # check to see if the current and configured aggregationMethods are the same if (str_xFilesFactor != info_xFilesFactor): rebuild = True messages += '%s xFilesFactor differs real: %s should be: %s \n' % (metric, info_xFilesFactor, str_xFilesFactor) if (aggregationMethod != info['aggregationMethod']): rebuild = True messages += '%s aggregation schema differs real: %s should be: %s \n' % (metric, info['aggregationMethod'], aggregationMethod) # check to see if the current and configured xFilesFactor are the same if (xFilesFactor != info['xFilesFactor']): rebuild = True messages += '%s xFilesFactor differs real: %s should be: %s \n' % (metric, info['xFilesFactor'], xFilesFactor) # if we need to rebuild, lets do it. if (rebuild == True): cmd = 'whisper-resize.py %s %s --xFilesFactor=%s --aggregationMethod=%s %s' % (fullPath, options.extra_args, xFilesFactor, aggregationMethod, schema_config_args) if (options.quiet != True or options.confirm == True): print messages print cmd if (options.confirm == True): options.doit = confirm("Would you like to run this command? [y/n]: ") if (options.doit == False): print "Skipping command \n" if (options.doit == True): exitcode = call(cmd, shell=True) # if the command failed lets bail so we can take a look before proceeding if (exitcode > 0): print 'Error running: %s' % (cmd) sys.exit(1)
' Note that this will make things slower and use more memory.') (options, args) = option_parser.parse_args() if len(args) < 2: option_parser.print_usage() sys.exit(1) path = args[0] if not os.path.exists(path): sys.stderr.write("[ERROR] File '%s' does not exist!\n\n" % path) option_parser.print_usage() sys.exit(1) info = whisper.info(path) new_archives = [ whisper.parseRetentionDef(retentionDef) for retentionDef in args[1:] ] old_archives = info['archives'] # sort by precision, lowest to highest old_archives.sort(key=lambda a: a['secondsPerPoint'], reverse=True) if options.xFilesFactor is None: xff = info['xFilesFactor'] else: xff = options.xFilesFactor if options.aggregationMethod is None:
def getMetadata(self, metric, key): if key != 'aggregationMethod': raise ValueError("Unsupported metadata key \"%s\"" % key) wsp_path = self.getFilesystemPath(metric) return whisper.info(wsp_path)['aggregationMethod']
def processMetric(fullPath, schemas, agg_schemas): """ method to process a given metric, and resize it if necessary Parameters: fullPath - full path to the metric whisper file schemas - carbon storage schemas loaded from config agg_schemas - carbon storage aggregation schemas load from confg """ schema_config_args = '' schema_file_args = '' rebuild = False messages = '' # get archive info from whisper file info = whisper.info(fullPath) # get graphite metric name from fullPath metric = getMetricFromPath(fullPath) # loop the carbon-storage schemas for schema in schemas: if schema.matches(metric): # returns secondsPerPoint and points for this schema in tuple format archive_config = [ archive.getTuple() for archive in schema.archives ] break # loop through the carbon-aggregation schemas for agg_schema in agg_schemas: if agg_schema.matches(metric): xFilesFactor, aggregationMethod = agg_schema.archives break if xFilesFactor is None: xFilesFactor = 0.5 if aggregationMethod is None: aggregationMethod = 'average' # loop through the bucket tuples and convert to string format for resizing for retention in archive_config: current_schema = '%s:%s ' % (retention[0], retention[1]) schema_config_args += current_schema # loop through the current files bucket sizes and convert to string format # to compare for resizing for fileRetention in info['archives']: current_schema = '%s:%s ' % (fileRetention['secondsPerPoint'], fileRetention['points']) schema_file_args += current_schema # check to see if the current and configured schemas are the same or rebuild if (schema_config_args != schema_file_args): rebuild = True messages += 'updating Retentions from: %s to: %s \n' % \ (schema_file_args, schema_config_args) # only care about the first two decimals in the comparison since there is # floaty stuff going on. info_xFilesFactor = "{0:.2f}".format(info['xFilesFactor']) str_xFilesFactor = "{0:.2f}".format(xFilesFactor) # check to see if the current and configured xFilesFactor are the same if (str_xFilesFactor != info_xFilesFactor): rebuild = True messages += '%s xFilesFactor differs real: %s should be: %s \n' % \ (metric, info_xFilesFactor, str_xFilesFactor) # check to see if the current and configured aggregationMethods are the same if (aggregationMethod != info['aggregationMethod']): rebuild = True messages += '%s aggregation schema differs real: %s should be: %s \n' % \ (metric, info['aggregationMethod'], aggregationMethod) # if we need to rebuild, lets do it. if rebuild is True: cmd = [whisperResizeExecutable, fullPath] for x in shlex.split(options.extra_args): cmd.append(x) cmd.append('--xFilesFactor=' + str(xFilesFactor)) cmd.append('--aggregationMethod=' + str(aggregationMethod)) for x in shlex.split(schema_config_args): cmd.append(x) if options.quiet is not True or options.confirm is True: print(messages) print(cmd) if options.confirm is True: options.doit = confirm( "Would you like to run this command? [y/n]: ") if options.doit is False: print("Skipping command \n") if options.doit is True: exitcode = call(cmd) # if the command failed lets bail so we can take a look before proceeding if (exitcode > 0): print('Error running: %s' % (cmd)) sys.exit(1)
def info(self): if not self.meta_info: self.meta_info = whisper.info(self.fs_path) return self.meta_info
def get_intervals(self): start = time.time() - whisper.info(self.fs_path)['maxRetention'] end = max( stat(self.fs_path).st_mtime, start ) return IntervalSet( [Interval(start, end)] )
def get_intervals(self): start = time.time() - whisper.info(self.fs_path)['maxRetention'] end = max(os.stat(self.fs_path).st_mtime, start) return IntervalSet([Interval(start, end)])
option_parser = optparse.OptionParser(usage='''%prog path [field]''') (options, args) = option_parser.parse_args() if len(args) < 1: option_parser.print_help() sys.exit(1) path = args[0] if len(args) > 1: field = args[1] else: field = None try: info = whisper.info(path) except whisper.WhisperException, exc: raise SystemExit('[ERROR] %s' % str(exc)) info['fileSize'] = os.stat(path).st_size if field: if field not in info: print 'Unknown field "%s". Valid fields are %s' % (field, ','.join(info)) sys.exit(1) print info[field] sys.exit(0) archives = info.pop('archives')