Example #1
0
    def test_single_metric(self):
        xfilesfactor = 0.5
        aggregation_method = "last"
        # This retentions are such that every other point is present in both
        # archives. Test validates that duplicate points gets inserted only once.
        retentions = [(1, 10), (2, 10)]
        high_precision_duration = retentions[0][0] * retentions[0][1]
        low_precision_duration = retentions[1][0] * retentions[1][1]
        now = int(time.time())
        time_from, time_to = now - low_precision_duration, now
        points = [(float(t), float(now - t))
                  for t in xrange(time_from, time_to)]
        metric = "test_metric"
        metric_path = os_path.join(self.tempdir, metric + ".wsp")
        whisper.create(metric_path, retentions, xfilesfactor,
                       aggregation_method)
        whisper.update_many(metric_path, points)

        self._call_main()

        metric = self.accessor.get_metric(metric)
        self.assertTrue(metric)
        self.assertEqual(metric.name, metric.name)
        self.assertEqual(metric.aggregator.carbon_name, aggregation_method)
        self.assertEqual(metric.carbon_xfilesfactor, xfilesfactor)
        self.assertEqual(metric.retention.as_string, "10*1s:10*2s")

        points_again = list(
            self.accessor.fetch_points(metric, time_from, time_to,
                                       metric.retention[0]))
        self.assertEqual(points[-high_precision_duration:], points_again)
Example #2
0
    def test_update_many_excess(self):
        # given an empty db
        wsp = "test_update_many_excess.wsp"
        self.addCleanup(self._remove, wsp)
        archive_len = 3
        archive_step = 1
        whisper.create(wsp, [(archive_step, archive_len)])

        # given too many points than the db can hold
        excess_len = 1
        num_input_points = archive_len + excess_len
        test_now = int(time.time())
        input_start = test_now - num_input_points + archive_step
        input_points = [(input_start + i, random.random() * 10)
                        for i in range(num_input_points)]

        # when the db is updated with too many points
        whisper.update_many(wsp, input_points, now=test_now)

        # then only the most recent input points (those at the end) were written
        actual_time_info = whisper.fetch(wsp, 0, now=test_now)[0]
        self.assertEqual(actual_time_info,
                         (input_points[-archive_len][0],
                          input_points[-1][0] + archive_step,  # untilInterval = newest + step
                          archive_step))
    def test_single_metric(self):
        xfilesfactor = 0.5
        aggregation_method = "last"
        # This retentions are such that every other point is present in both
        # archives. Test validates that duplicate points gets inserted only once.
        retentions = [(1, 10), (2, 10)]
        high_precision_duration = retentions[0][0] * retentions[0][1]
        low_precision_duration = retentions[1][0] * retentions[1][1]
        now = int(time.time())
        time_from, time_to = now - low_precision_duration, now
        points = [(float(t), float(now-t)) for t in xrange(time_from, time_to)]
        metric = "test_metric"
        metric_path = os_path.join(self.tempdir, metric + ".wsp")
        whisper.create(metric_path, retentions, xfilesfactor, aggregation_method)
        whisper.update_many(metric_path, points)

        self._call_main()

        metric = self.accessor.get_metric(metric)
        self.assertTrue(metric)
        self.assertEqual(metric.name, metric.name)
        self.assertEqual(metric.aggregator.carbon_name, aggregation_method)
        self.assertEqual(metric.carbon_xfilesfactor, xfilesfactor)
        self.assertEqual(metric.retention.as_string, "10*1s:10*2s")

        points_again = list(self.accessor.fetch_points(
            metric, time_from, time_to, metric.retention[0]))
        self.assertEqual(points[-high_precision_duration:], points_again)
Example #4
0
    def update(self, updates):
        """
        whisperをまとめて更新する
        """
        # updatesをbox_id別に管理する
        sorted_updates = defaultdict(lambda: defaultdict(int))

        # whisperのtimestampは秒単位なので、秒以下はまとめる
        for box_id, timestamp in updates:
            sorted_updates[box_id][int(timestamp)] += 1

        for box_id, timestamps in sorted_updates.items():
            # whisperの中でSORTかけてた
            # timestamps = sorted(timestamps.items())
            timestamps = timestamps.items()

            db_path = os.path.join(self.dir_prefix,
                                   TimedDB.make_db_name(box_id))

            if not os.path.exists(db_path):
                logger.info('create whsiper db for box %s at path %s', box_id,
                            db_path)
                whisper.create(db_path,
                               WHISPER_ARCHIVES,
                               xFilesFactor=0.5,
                               aggregationMethod='sum',
                               sparse=False,
                               useFallocate=True)

            with get_lock(db_path):
                whisper.update_many(db_path, timestamps)
Example #5
0
    def test_update_many_excess(self):
        # given an empty db
        wsp = "test_update_many_excess.wsp"
        self.addCleanup(self._remove, wsp)
        archive_len = 3
        archive_step = 1
        whisper.create(wsp, [(archive_step, archive_len)])

        # given too many points than the db can hold
        excess_len = 1
        num_input_points = archive_len + excess_len
        test_now = int(time.time())
        input_start = test_now - num_input_points + archive_step
        input_points = [(input_start + i, random.random() * 10)
                        for i in range(num_input_points)]

        # when the db is updated with too many points
        whisper.update_many(wsp, input_points, now=test_now)

        # then only the most recent input points (those at the end) were written
        actual_time_info = whisper.fetch(wsp, 0, now=test_now)[0]
        self.assertEqual(actual_time_info,
                         (input_points[-archive_len][0],
                          input_points[-1][0] + archive_step,  # untilInterval = newest + step
                          archive_step))
Example #6
0
def writeWhisperFile(dbFilePath, datapoints):
  try:
    whisper.update_many(dbFilePath, datapoints)
  except:
    log.msg("Error writing to %s" % (dbFilePath))
    log.err()
    instrumentation.increment('errors')
    return False
  return True
Example #7
0
 def _createdb(self, wsp, schema=[(1, 20)], data=None):
     whisper.create(wsp, schema)
     if data is None:
         tn = time.time() - 20
         data = []
         for i in range(20):
             data.append((tn + 1 + i, random.random() * 10))
     whisper.update_many(wsp, data)
     return data
Example #8
0
 def _createdb(self, wsp, schema=[(1, 20)], data=None):
     whisper.create(wsp, schema)
     if data is None:
         tn = time.time() - 20
         data = []
         for i in range(20):
             data.append((tn + 1 + i, random.random() * 10))
     whisper.update_many(wsp, data)
     return data
Example #9
0
    def test_resize_with_aggregate(self):
        """resize whisper file with aggregate"""
        # 60s per point save two days
        retention = [(60, 60 * 24 * 2)]
        whisper.create(self.filename, retention)

        # insert data
        now_timestamp = int(
            (datetime.now() - datetime(1970, 1, 1)).total_seconds())
        now_timestamp -= now_timestamp % 60  # format timestamp
        points = [(now_timestamp - i * 60, i) for i in range(0, 60 * 24 * 2)]
        whisper.update_many(self.filename, points)
        data = whisper.fetch(self.filename,
                             fromTime=now_timestamp - 3600 * 25,
                             untilTime=now_timestamp - 3600 * 25 + 60 * 10)
        self.assertEqual(len(data[1]), 10)
        self.assertEqual(data[0][2], 60)  # high retention == 60
        for d in data[1]:
            self.assertIsNotNone(d)
        # resize from high to low
        os.system(
            'whisper-resize.py %s 60s:1d 300s:2d --aggregate --nobackup >/dev/null'
            % self.filename)  # noqa
        data_low = whisper.fetch(self.filename,
                                 fromTime=now_timestamp - 3600 * 25,
                                 untilTime=now_timestamp - 3600 * 25 + 60 * 10)
        self.assertEqual(len(data_low[1]), 2)
        self.assertEqual(data_low[0][2], 300)  # low retention == 300
        for d in data_low[1]:
            self.assertIsNotNone(d)
        data_high = whisper.fetch(self.filename,
                                  fromTime=now_timestamp - 60 * 10,
                                  untilTime=now_timestamp)
        self.assertEqual(len(data_high[1]), 10)
        self.assertEqual(data_high[0][2], 60)  # high retention == 60
        # resize from low to high
        os.system(
            'whisper-resize.py %s 60s:2d --aggregate --nobackup >/dev/null' %
            self.filename)  # noqa
        data1 = whisper.fetch(self.filename,
                              fromTime=now_timestamp - 3600 * 25,
                              untilTime=now_timestamp - 3600 * 25 + 60 * 10)
        self.assertEqual(len(data1[1]), 10)
        # noqa data1 looks like ((1588836720, 1588837320, 60), [None, None, 1490.0, None, None, None, None, 1485.0, None, None])
        # data1[1] have two not none value
        self.assertEqual(len(list(filter(lambda x: x is not None, data1[1]))),
                         2)
        data2 = whisper.fetch(self.filename,
                              fromTime=now_timestamp - 60 * 15,
                              untilTime=now_timestamp - 60 * 5)
        # noqa data2 looks like ((1588925820, 1588926420, 60), [10.0, 11.0, 10.0, 9.0, 8.0, 5.0, 6.0, 5.0, 4.0, 3.0])
        self.assertEqual(len(list(filter(lambda x: x is not None, data2[1]))),
                         10)

        # clean up
        self.tearDown()
Example #10
0
 def write_series(self, series):
     file_name = os.path.join(
         WHISPER_DIR,
         '{0}.wsp'.format(series.pathExpression.replace('.', os.sep)))
     os.makedirs(os.path.dirname(file_name))
     whisper.create(file_name, [(1, 180)])
     data = []
     for index, value in enumerate(series):
         if value is None:
             continue
         data.append((series.start + index * series.step, value))
     whisper.update_many(file_name, data)
Example #11
0
    def _update(self, datapoints):
        """
        This method store in the datapoints in the current database.

            :datapoints: is a list of tupple with the epoch timestamp and value
                 [(1368977629,10)]
        """
        if len(datapoints) == 1:
            timestamp, value = datapoints[0]
            whisper.update(self.path, value, timestamp)
        else:
            whisper.update_many(self.path, datapoints)
Example #12
0
    def _update(self, datapoints):
        """
        This method store in the datapoints in the current database.

            :datapoints: is a list of tupple with the epoch timestamp and value
                 [(1368977629,10)]
        """
        if len(datapoints) == 1:
            timestamp, value = datapoints[0]
            whisper.update(self.path, value, timestamp)
        else:
            whisper.update_many(self.path, datapoints)
Example #13
0
def update(path, datapoints):
    nrOfPoints = len(datapoints),
    if nrOfPoints == 1:
        (timestamp, value) = datapoints[0]
        timestamp = timegm(timestamp.timetuple())
        whisper.update(path, value, timestamp)
    elif nrOfPoints > 1:
        whisper.update_many(path + '.wsp', [
            (timegm(t.timetuple()), v) for (t,v) in datapoints])
    else:
        raise Exception("No Datapoint given")

    return True
Example #14
0
def load_data(f_name, dest_file):
    with open(f_name, 'r') as fp:
        start = False
        for line in fp:
            datapoints = []
            if start == False:
                if line.find("Archive ") == 0 and line.find(" data:") > 0:
                    start = True
            else:
                datas = line.split(" ")
                if len(datas) == 3 and datas[0] != 'Archive':
                    datapoints.append((datas[1][:-1], datas[2]))
                    #print datapoints
                    whisper.update_many(dest_file, datapoints)
Example #15
0
 def handle(self):
     points = 0
     for metric in self.redis.smembers(METRICS):
         values = self.redis.zrange(metric, 0, -1)
         points += len(values)
         f = target_to_path(self.path, metric)
         d = os.path.dirname(f)
         if d not in self.dirs:
             if not os.path.isdir(d):
                 os.makedirs(d)
             self.dirs.add(d)
         if not os.path.exists(f):
             whisper.create(f, [(10, 1000)])  # [FIXME] hardcoded values
         whisper.update_many(f, [struct.unpack('!ff', a) for a in values])
         if len(values):
             self.redis.zrem(metric, *values)
     self.metric(METRIC_POINTS, points)
Example #16
0
def fill(src, dst, tstart, tstop):
    # fetch range start-stop from src, taking values from the highest
    # precision archive, thus optionally requiring multiple fetch + merges
    srcHeader = info(src)

    srcArchives = srcHeader["archives"]
    srcArchives.sort(key=itemgetter("retention"))

    # find oldest point in time, stored by both files
    srcTime = int(time.time()) - srcHeader["maxRetention"]

    if tstart < srcTime and tstop < srcTime:
        return

    # we want to retain as much precision as we can, hence we do backwards
    # walk in time

    # skip forward at max 'step' points at a time
    for archive in srcArchives:
        # skip over archives that don't have any data points
        rtime = time.time() - archive["retention"]
        if tstop <= rtime:
            continue

        untilTime = tstop
        fromTime = rtime if rtime > tstart else tstart

        (timeInfo, values) = fetch(src, fromTime, untilTime)
        (start, end, archive_step) = timeInfo
        pointsToWrite = list(
            itertools.ifilter(
                lambda points: points[1] is not None, itertools.izip(xrange(start, end, archive_step), values)
            )
        )
        # order points by timestamp, newest first
        pointsToWrite.sort(key=lambda p: p[0], reverse=True)
        update_many(dst, pointsToWrite)

        tstop = fromTime

        # can stop when there's nothing to fetch any more
        if tstart == tstop:
            return
Example #17
0
    def _update(self, wsp=None, schema=None):
        wsp = wsp or self.filename
        schema = schema or [(1, 20)]

        num_data_points = 20

        # create sample data
        whisper.create(wsp, schema)
        tn = time.time() - num_data_points
        data = []
        for i in range(num_data_points):
            data.append((tn + 1 + i, random.random() * 10))

        # test single update
        whisper.update(wsp, data[0][1], data[0][0])

        # test multi update
        whisper.update_many(wsp, data[1:])
        return data
Example #18
0
    def _update(self, wsp=None, schema=None, sparse=False, useFallocate=False):
        wsp = wsp or self.filename
        schema = schema or [(1, 20)]

        num_data_points = 20

        # create sample data
        whisper.create(wsp, schema, sparse=sparse, useFallocate=useFallocate)
        tn = time.time() - num_data_points
        data = []
        for i in range(num_data_points):
            data.append((tn + 1 + i, random.random() * 10))

        # test single update
        whisper.update(wsp, data[0][1], data[0][0])

        # test multi update
        whisper.update_many(wsp, data[1:])
        return data
Example #19
0
    def _update(self, wsp=None, schema=None):
        wsp = wsp or self.db
        schema = schema or [(1, 20)]
        num_data_points = 20

        whisper.create(wsp, schema)

        # create sample data
        tn = time.time() - num_data_points
        data = []
        for i in range(num_data_points):
            data.append((tn + 1 + i, random.random() * 10))

        # test single update
        whisper.update(wsp, data[0][1], data[0][0])

        # test multi update
        whisper.update_many(wsp, data[1:])
        return data
Example #20
0
def fill(src, dst, tstart, tstop):
    # fetch range start-stop from src, taking values from the highest
    # precision archive, thus optionally requiring multiple fetch + merges
    srcHeader = info(src)

    srcArchives = srcHeader['archives']
    srcArchives.sort(key=itemgetter('retention'))

    # find oldest point in time, stored by both files
    srcTime = int(time.time()) - srcHeader['maxRetention']

    if tstart < srcTime and tstop < srcTime:
        return

    # we want to retain as much precision as we can, hence we do backwards
    # walk in time

    # skip forward at max 'step' points at a time
    for archive in srcArchives:
        # skip over archives that don't have any data points
        rtime = time.time() - archive['retention']
        if tstop <= rtime:
            continue

        untilTime = tstop
        fromTime = rtime if rtime > tstart else tstart

        (timeInfo, values) = fetch(src, fromTime, untilTime)
        (start, end, archive_step) = timeInfo
        pointsToWrite = list(
            itertools.ifilter(
                lambda points: points[1] is not None,
                itertools.izip(xrange(start, end, archive_step), values)))
        # order points by timestamp, newest first
        pointsToWrite.sort(key=lambda p: p[0], reverse=True)
        update_many(dst, pointsToWrite)

        tstop = fromTime

        # can stop when there's nothing to fetch any more
        if tstart == tstop:
            return
Example #21
0
    def _update(self, wsp=None, schema=None, sparse=False, useFallocate=False):
        wsp = wsp or self.filename
        schema = schema or [(1, 20)]

        num_data_points = 20

        # create sample data
        self.addCleanup(self._remove, wsp)
        whisper.create(wsp, schema, sparse=sparse, useFallocate=useFallocate)
        tn = int(time.time()) - num_data_points

        data = []
        for i in range(num_data_points):
            data.append((tn + 1 + i, random.random() * 10))

        # test single update
        whisper.update(wsp, data[0][1], data[0][0])

        # test multi update
        whisper.update_many(wsp, data[1:])

        return data
    def test_single_metric(self):
        xfilesfactor = 0.5
        aggregation_method = "last"
        retentions = [(1, 60)]
        now = int(time.time())
        time_from, time_to = now - 10, now
        points = [(t, now-t) for t in xrange(time_from, time_to)]
        metric = "test_metric"
        metric_path = os_path.join(self.tempdir, metric + ".wsp")
        whisper.create(metric_path, retentions, xfilesfactor, aggregation_method)
        whisper.update_many(metric_path, points)

        self._call_main()

        meta = self.accessor.get_metric(metric)
        self.assertTrue(meta)
        self.assertEqual(meta.name, metric)
        self.assertEqual(meta.carbon_aggregation, aggregation_method)
        self.assertEqual(meta.carbon_xfilesfactor, xfilesfactor)
        self.assertEqual(meta.carbon_retentions, retentions)

        points_again = self.accessor.fetch_points(metric, time_from, time_to, step=1)
        self.assertEqual(points, points_again)
Example #23
0
The license of this file is explicitly Apache License 2.0 in accordance with
this, and its usage of the whisper libraries. It is designed to be called
externally by NAV migration tools to avoid license incompatibilities between
GPL v2 and Apache License v2.

"""
import sys
import time
import argparse
try:
    import whisper
except ImportError:
    raise SystemExit('[ERROR] Please make sure whisper is installed properly')

now = int(time.time())
option_parser = argparse.ArgumentParser(
    description="Accepts multiple Whisper datapoints on stdin to update a "
    "single .wsp file")
option_parser.add_argument("filename",
                           nargs=1,
                           help="path to a .wsp file to update")
args = option_parser.parse_args()

datapoint_strings = [point.replace('N:', '%d:' % now) for point in sys.stdin]
datapoints = [tuple(point.strip().split(':')) for point in datapoint_strings]

try:
    whisper.update_many(args.filename, datapoints)
except whisper.WhisperException as exc:
    raise SystemExit('[ERROR] %s' % str(exc))
Example #24
0
 def update_many(self, metric, datapoints):
     return whisper.update_many(self.getFilesystemPath(metric), datapoints)
Example #25
0
def writeCachedDataPoints():
  "Write datapoints until the MetricCache is completely empty"

  while MetricCache:
    dataWritten = False

    for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder():
      dataWritten = True

      if not dbFileExists:
        archiveConfig = None
        xFilesFactor, aggregationMethod = None, None

        for schema in SCHEMAS:
          if schema.matches(metric):
            log.creates('new metric %s matched schema %s' % (metric, schema.name))
            archiveConfig = [archive.getTuple() for archive in schema.archives]
            break

        for schema in AGGREGATION_SCHEMAS:
          if schema.matches(metric):
            log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name))
            xFilesFactor, aggregationMethod = schema.archives
            break

        if not archiveConfig:
          raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric)

        dbDir = dirname(dbFilePath)
        try:
            if not exists(dbDir):
                os.makedirs(dbDir)
        except OSError, e:
            log.err("%s" % e)
        log.creates("creating database file %s (archive=%s xff=%s agg=%s)" %
                    (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod))
        try:
            whisper.create(
                dbFilePath,
                archiveConfig,
                xFilesFactor,
                aggregationMethod,
                settings.WHISPER_SPARSE_CREATE,
                settings.WHISPER_FALLOCATE_CREATE)
            instrumentation.increment('creates')
        except:
            log.err("Error creating %s" % (dbFilePath))
            continue
      # If we've got a rate limit configured lets makes sure we enforce it
      if UPDATE_BUCKET:
        UPDATE_BUCKET.drain(1, blocking=True)
      try:
        t1 = time.time()
        whisper.update_many(dbFilePath, datapoints)
        updateTime = time.time() - t1
      except Exception:
        log.msg("Error writing to %s" % (dbFilePath))
        log.err()
        instrumentation.increment('errors')
      else:
        pointCount = len(datapoints)
        instrumentation.increment('committedPoints', pointCount)
        instrumentation.append('updateTimes', updateTime)
        if settings.LOG_UPDATES:
          log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))

    # Avoid churning CPU when only new metrics are in the cache
    if not dataWritten:
      time.sleep(0.1)
Example #26
0
 def write(self, metric, datapoints):
     path = self.getFilesystemPath(metric)
     whisper.update_many(path, datapoints)
Example #27
0
def writeCachedDataPoints():
    "Write datapoints until the MetricCache is completely empty"
    updates = 0
    lastSecond = 0

    while MetricCache:
        dataWritten = False

        for (metric, datapoints, dbFilePath,
             dbFileExists) in optimalWriteOrder():
            dataWritten = True

            if not dbFileExists:
                archiveConfig = None
                xFilesFactor, aggregationMethod = None, None

                for schema in schemas:
                    if schema.matches(metric):
                        log.creates('new metric %s matched schema %s' %
                                    (metric, schema.name))
                        archiveConfig = [
                            archive.getTuple() for archive in schema.archives
                        ]
                        break

                for schema in agg_schemas:
                    if schema.matches(metric):
                        log.creates(
                            'new metric %s matched aggregation schema %s' %
                            (metric, schema.name))
                        xFilesFactor, aggregationMethod = schema.archives
                        break

                if not archiveConfig:
                    raise Exception(
                        "No storage schema matched the metric '%s', check your storage-schemas.conf file."
                        % metric)

                dbDir = dirname(dbFilePath)
                try:
                    os.makedirs(dbDir, 0755)
                except OSError as e:
                    log.err("%s" % e)
                log.creates(
                    "creating database file %s (archive=%s xff=%s agg=%s)" %
                    (dbFilePath, archiveConfig, xFilesFactor,
                     aggregationMethod))
                whisper.create(dbFilePath, archiveConfig, xFilesFactor,
                               aggregationMethod,
                               settings.WHISPER_SPARSE_CREATE,
                               settings.WHISPER_FALLOCATE_CREATE)
                instrumentation.increment('creates')

            try:
                t1 = time.time()
                whisper.update_many(dbFilePath, datapoints)
                t2 = time.time()
                updateTime = t2 - t1
            except:
                log.msg("Error writing to %s" % (dbFilePath))
                log.err()
                instrumentation.increment('errors')
            else:
                pointCount = len(datapoints)
                instrumentation.increment('committedPoints', pointCount)
                instrumentation.append('updateTimes', updateTime)

                if settings.LOG_UPDATES:
                    log.updates("wrote %d datapoints for %s in %.5f seconds" %
                                (pointCount, metric, updateTime))

                # Rate limit update operations
                thisSecond = int(t2)

                if thisSecond != lastSecond:
                    lastSecond = thisSecond
                    updates = 0
                else:
                    updates += 1
                    if updates >= settings.MAX_UPDATES_PER_SECOND:
                        time.sleep(int(t2 + 1) - t2)

        # Avoid churning CPU when only new metrics are in the cache
        if not dataWritten:
            time.sleep(0.1)
Example #28
0
 def write(self, metric, datapoints):
   path = self.getFilesystemPath(metric)
   whisper.update_many(path, datapoints)
Example #29
0
#!/usr/bin/env python

import sys, time
import whisper
from optparse import OptionParser

now = int( time.time() )

option_parser = OptionParser(
    usage='''%prog [options] path timestamp:value [timestamp:value]*''')

(options, args) = option_parser.parse_args()

if len(args) < 2:
  option_parser.print_usage()
  sys.exit(1)

path = args[0]
datapoint_strings = args[1:]
datapoint_strings = [point.replace('N:', '%d:' % now)
                     for point in datapoint_strings]
datapoints = [tuple(point.split(':')) for point in datapoint_strings]

if len(datapoints) == 1:
  timestamp,value = datapoints[0]
  whisper.update(path, value, timestamp)
else:
  print datapoints
  whisper.update_many(path, datapoints)
Example #30
0
    os.unlink(tmpfile)
  newfile = tmpfile
else:
  newfile = options.newfile

print 'Creating new whisper database: %s' % newfile
whisper.create(newfile, new_archives, xFilesFactor=xff)
size = os.stat(newfile).st_size
print 'Created: %s (%d bytes)' % (newfile,size)

print 'Migrating data...'
for archive in old_archives:
  timeinfo, values = archive['data']
  datapoints = zip( range(*timeinfo), values )
  datapoints = filter(lambda p: p[1] is not None, datapoints)
  whisper.update_many(newfile, datapoints)

if options.newfile is not None:
  sys.exit(0)

backup = path + '.bak'
print 'Renaming old database to: %s' % backup
os.rename(path, backup)

try:
  print 'Renaming new database to: %s' % path
  os.rename(tmpfile, path)
except:
  traceback.print_exc()
  print '\nOperation failed, restoring backup'
  os.rename(backup, path)
Example #31
0
File: db.py Project: jbooth/carbon
 def update_many(self, metric, datapoints):
     return whisper.update_many(self.getFilesystemPath(metric), datapoints)
 def update_many(self, metric, datapoints, dbIdentifier):
   dbFilePath = dbIdentifier
   whisper.update_many(dbFilePath, datapoints)
def writeCachedDataPoints():
  "Write datapoints until the MetricCache is completely empty"
  
  global  updates 
  global  lastSecond 

#  while MetricCache:
#  dataWritten = False

  for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder():
    dataWritten = True

    if not dbFileExists:
      archiveConfig = None
      xFilesFactor, aggregationMethod = None, None

      for schema in schemas:
        if schema.matches(metric):
          log.creates('new metric %s matched schema %s' % (metric, schema.name))
          archiveConfig = [archive.getTuple() for archive in schema.archives]
          break

      for schema in agg_schemas:
        if schema.matches(metric):
          log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name))
          xFilesFactor, aggregationMethod = schema.archives
          break

      if not archiveConfig:
        raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric)
      try: 
        dbDir = dirname(dbFilePath)
        os.system("umask u=rwx,go=rx ; mkdir -p -m 755 '%s'" % dbDir)

        log.creates("creating database file %s (archive=%s xff=%s agg=%s)" % 
                  (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod))
        whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod)
        os.chmod(dbFilePath, 0755)
        instrumentation.increment('creates')
      except IOError as e:
        log.msg("IOError: {0}".format(e))

    try:
      t1 = time.time()
      whisper.update_many(dbFilePath, datapoints)
      t2 = time.time()
      updateTime = t2 - t1
    except:
      log.err()
      instrumentation.increment('errors')
    else:
      pointCount = len(datapoints)
      instrumentation.increment('committedPoints', pointCount)
      instrumentation.append('updateTimes', updateTime)

      if settings.LOG_UPDATES:
        log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))

      # Rate limit update operations
      thisSecond = int(t2)

      if thisSecond != lastSecond:
        lastSecond = thisSecond
        updates = 0
      else:
        updates += 1
        if updates >= settings.MAX_UPDATES_PER_SECOND:
          time.sleep( int(t2 + 1) - t2 )
  time.sleep(5)
Example #34
0
def writeCachedDataPoints():
    "Write datapoints until the MetricCache is completely empty"
    updates = 0
    lastSecond = 0

    while MetricCache:
        dataWritten = False

        for (metric, datapoints, dbFilePath,
             dbFileExists) in optimalWriteOrder():
            dataWritten = True

            if not dbFileExists:
                archiveConfig = None

                for schema in schemas:
                    if schema.matches(metric):
                        log.creates('new metric %s matched schema %s' %
                                    (metric, schema.name))
                        archiveConfig = [
                            archive.getTuple() for archive in schema.archives
                        ]
                        break

                if not archiveConfig:
                    raise Exception(
                        "No storage schema matched the metric '%s', check your storage-schemas.conf file."
                        % metric)

                dbDir = dirname(dbFilePath)
                os.system("mkdir -p -m 755 '%s'" % dbDir)

                log.creates("creating database file %s" % dbFilePath)
                whisper.create(dbFilePath, archiveConfig)
                os.chmod(dbFilePath, 0755)
                increment('creates')

                # Create metadata file
                dbFileName = basename(dbFilePath)
                metaFilePath = join(
                    dbDir, dbFileName[:-len('.wsp')] + '.context.pickle')
                createMetaFile(metric, schema, metaFilePath)

            try:
                t1 = time.time()
                whisper.update_many(dbFilePath, datapoints)
                t2 = time.time()
                updateTime = t2 - t1
            except:
                log.err()
                increment('errors')
            else:
                pointCount = len(datapoints)
                increment('committedPoints', pointCount)
                append('updateTimes', updateTime)

                if settings.LOG_UPDATES:
                    log.updates("wrote %d datapoints for %s in %.5f seconds" %
                                (pointCount, metric, updateTime))

                # Rate limit update operations
                thisSecond = int(t2)

                if thisSecond != lastSecond:
                    lastSecond = thisSecond
                    updates = 0
                else:
                    updates += 1
                    if updates >= settings.MAX_UPDATES_PER_SECOND:
                        time.sleep(int(t2 + 1) - t2)

        # Avoid churning CPU when only new metrics are in the cache
        if not dataWritten:
            time.sleep(0.1)
Example #35
0
        for tinterval in zip(timepoints_to_update[:-1],
                             timepoints_to_update[1:]):
            # TODO: Setting lo= parameter for 'lefti' based on righti from previous
            #       iteration. Obviously, this can only be done if
            #       timepoints_to_update is always updated. Is it?
            lefti = bisect.bisect_left(oldtimestamps, tinterval[0])
            righti = bisect.bisect_left(oldtimestamps, tinterval[1], lo=lefti)
            newvalues = oldvalues[lefti:righti]
            if newvalues:
                non_none = filter(lambda x: x is not None, newvalues)
                if 1.0 * len(non_none) / len(newvalues) >= xff:
                    newdatapoints.append([
                        tinterval[0],
                        whisper.aggregate(aggregationMethod, non_none)
                    ])
        whisper.update_many(newfile, newdatapoints)
else:
    print 'Migrating data without aggregation...'
    for archive in old_archives:
        timeinfo, values = archive['data']
        datapoints = zip(range(*timeinfo), values)
        datapoints = filter(lambda p: p[1] is not None, datapoints)
        whisper.update_many(newfile, datapoints)

if options.newfile is not None:
    sys.exit(0)

backup = path + '.bak'
print 'Renaming old database to: %s' % backup
os.rename(path, backup)
Example #36
0
def writeCachedDataPoints():
  "Write datapoints until the MetricCache is completely empty"
  updates = 0
  lastSecond = 0

  while MetricCache:
    dataWritten = False

    for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder():
      dataWritten = True

      if not dbFileExists:
        archiveConfig = None
        xFilesFactor, aggregationMethod = None, None

        for schema in schemas:
          if schema.matches(metric):
            log.creates('new metric %s matched schema %s' % (metric, schema.name))
            archiveConfig = [archive.getTuple() for archive in schema.archives]
            break

        for schema in agg_schemas:
          if schema.matches(metric):
            log.creates('new metric %s matched aggregation schema %s' % (metric, schema.name))
            xFilesFactor, aggregationMethod = schema.archives
            break

        if not archiveConfig:
          raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric)

        dbDir = dirname(dbFilePath)
        try:
            os.makedirs(dbDir, 0755)
        except OSError as e:
            log.err("%s" % e)
        log.creates("creating database file %s (archive=%s xff=%s agg=%s)" %
                    (dbFilePath, archiveConfig, xFilesFactor, aggregationMethod))
        whisper.create(dbFilePath, archiveConfig, xFilesFactor, aggregationMethod, settings.WHISPER_SPARSE_CREATE, settings.WHISPER_FALLOCATE_CREATE)
        instrumentation.increment('creates')

      try:
        t1 = time.time()
        whisper.update_many(dbFilePath, datapoints)
        t2 = time.time()
        updateTime = t2 - t1
      except:
        log.msg("Error writing to %s" % (dbFilePath))
        log.err()
        instrumentation.increment('errors')
      else:
        pointCount = len(datapoints)
        instrumentation.increment('committedPoints', pointCount)
        instrumentation.append('updateTimes', updateTime)

        if settings.LOG_UPDATES:
          log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))

        # Rate limit update operations
        thisSecond = int(t2)

        if thisSecond != lastSecond:
          lastSecond = thisSecond
          updates = 0
        else:
          updates += 1
          if updates >= settings.MAX_UPDATES_PER_SECOND:
            time.sleep(int(t2 + 1) - t2)

    # Avoid churning CPU when only new metrics are in the cache
    if not dataWritten:
      time.sleep(0.1)
Example #37
0
def convert_rrd(rrd_file, dest_dir):
    datasource_map = {
        'OUTOCTETS': 'out_octets',
        'OUTUCASTPKTS': 'out_unicast_packets',
        'OUTNUCASTPKTS': 'out_nunicast_packets',
        'INNUCASTPKTS': 'in_nunicast_packets',
        'INERRORS': 'in_errors',
        'OUTERRORS': 'out_errors',
        'INUCASTPKTS': 'in_unicast_packets',
        'INOCTETS': 'in_octets',
    }

    rra_indices = []
    rrd_info = rrdtool.info(rrd_file)
    seconds_per_pdp = rrd_info['step']
    for key in rrd_info:
        if key.startswith('rra['):
            index = int(key.split('[')[1].split(']')[0])
            rra_indices.append(index)

    rra_count = max(rra_indices) + 1
    rras = []
    for i in range(rra_count):
        rra_info = {}
        rra_info['pdp_per_row'] = rrd_info['rra[%d].pdp_per_row' % i]
        rra_info['rows'] = rrd_info['rra[%d].rows' % i]
        rra_info['cf'] = rrd_info['rra[%d].cf' % i]
        if 'xff' in rrd_info:
            rra_info['xff'] = rrd_info['rra[%d].xff' % i]
        rras.append(rra_info)

    datasources = []
    if 'ds' in rrd_info:
        datasources = rrd_info['ds'].keys()
    else:
        ds_keys = [key for key in rrd_info if key.startswith('ds[')]
        datasources = list(set(key[3:].split(']')[0] for key in ds_keys))

    relevant_rras = []
    for rra in rras:
        if rra['cf'] == 'MAX':
            relevant_rras.append(rra)

    archives = []
    for rra in relevant_rras:
        precision = rra['pdp_per_row'] * seconds_per_pdp
        points = rra['rows']
        archives.append((precision, points))

    for datasource in datasources:
        now = int(time.time())
        d = datasource_map[datasource]
        dest_path = f"{dest_dir}/{d}.wsp"
        try:
            whisper.create(dest_path, archives, xFilesFactor=0.5)
        except whisper.InvalidConfiguration:
            pass

        datapoints = []
        for precision, points in reversed(archives):
            retention = precision * points
            endTime = now - now % precision
            startTime = endTime - retention
            (time_info, columns, rows) = rrdtool.fetch(rrd_file, 'MAX', '-r',
                                                       str(precision), '-s',
                                                       str(startTime), '-e',
                                                       str(endTime), '-a')
            column_index = list(columns).index(datasource)
            rows.pop()
            values = [row[column_index] for row in rows]
            timestamps = list(range(*time_info))
            datapoints = zip(timestamps, values)
            datapoints = [
                datapoint for datapoint in datapoints
                if datapoint[1] is not None
            ]
            whisper.update_many(dest_path, datapoints)
Example #38
0
    def update_many(self, metric, datapoints, retention_config):
	''' Update datapoints but quietly ignore the retention_config '''
        return whisper.update_many(self.getFilesystemPath(metric), datapoints)
Example #39
0
    except whisper.InvalidConfiguration as e:
        raise SystemExit('[ERROR] %s' % str(e))
    size = os.stat(path).st_size
    archiveConfig = ','.join(["%d:%d" % ar for ar in archives])
    print("Created: %s (%d bytes) with archives: %s" %
          (path, size, archiveConfig))

    print("Migrating data")
    archiveNumber = len(archives) - 1
    for precision, points in reversed(archives):
        retention = precision * points
        endTime = now - now % precision
        startTime = endTime - retention
        (time_info,
         columns, rows) = rrdtool.fetch(rrd_path,
                                        options.aggregationMethod.upper(),
                                        '-r', str(precision), '-s',
                                        str(startTime), '-e', str(endTime))
        column_index = list(columns).index(datasource)
        rows.pop(
        )  # remove the last datapoint because RRD sometimes gives funky values

        values = [row[column_index] for row in rows]
        timestamps = list(range(*time_info))
        datapoints = zip(timestamps, values)
        datapoints = filter(lambda p: p[1] is not None, datapoints)
        print(' migrating %d datapoints from archive %d' %
              (len(datapoints), archiveNumber))
        archiveNumber -= 1
        whisper.update_many(path, datapoints)
Example #40
0
def writeCachedDataPoints():
    "Write datapoints until the MetricCache is completely empty"

    while MetricCache:
        dataWritten = False

        for (metric, datapoints, dbFilePath,
             dbFileExists) in optimalWriteOrder():
            dataWritten = True

            if not dbFileExists:
                archiveConfig = None
                xFilesFactor, aggregationMethod = None, None

                for schema in SCHEMAS:
                    if schema.matches(metric):
                        log.creates('new metric %s matched schema %s' %
                                    (metric, schema.name))
                        archiveConfig = [
                            archive.getTuple() for archive in schema.archives
                        ]
                        break

                for schema in AGGREGATION_SCHEMAS:
                    if schema.matches(metric):
                        log.creates(
                            'new metric %s matched aggregation schema %s' %
                            (metric, schema.name))
                        xFilesFactor, aggregationMethod = schema.archives
                        break

                if not archiveConfig:
                    raise Exception(
                        "No storage schema matched the metric '%s', check your storage-schemas.conf file."
                        % metric)

                dbDir = dirname(dbFilePath)
                try:
                    if not exists(dbDir):
                        os.makedirs(dbDir)
                except OSError, e:
                    log.err("%s" % e)
                log.creates(
                    "creating database file %s (archive=%s xff=%s agg=%s)" %
                    (dbFilePath, archiveConfig, xFilesFactor,
                     aggregationMethod))
                try:
                    whisper.create(dbFilePath, archiveConfig, xFilesFactor,
                                   aggregationMethod,
                                   settings.WHISPER_SPARSE_CREATE,
                                   settings.WHISPER_FALLOCATE_CREATE)
                    instrumentation.increment('creates')
                except:
                    log.err("Error creating %s" % (dbFilePath))
                    continue
            # If we've got a rate limit configured lets makes sure we enforce it
            if UPDATE_BUCKET:
                UPDATE_BUCKET.drain(1, blocking=True)
            try:
                t1 = time.time()
                whisper.update_many(dbFilePath, datapoints)
                updateTime = time.time() - t1
            except Exception:
                log.msg("Error writing to %s" % (dbFilePath))
                log.err()
                instrumentation.increment('errors')
            else:
                pointCount = len(datapoints)
                instrumentation.increment('committedPoints', pointCount)
                instrumentation.append('updateTimes', updateTime)
                if settings.LOG_UPDATES:
                    log.updates("wrote %d datapoints for %s in %.5f seconds" %
                                (pointCount, metric, updateTime))

        # Avoid churning CPU when only new metrics are in the cache
        if not dataWritten:
            time.sleep(0.1)
Example #41
0
def writeCachedDataPoints():
  "Write datapoints until the MetricCache is completely empty"
  updates = 0
  lastSecond = 0

  while MetricCache:
    dataWritten = False

    for (metric, datapoints, dbFilePath, dbFileExists) in optimalWriteOrder():
      dataWritten = True

      if not dbFileExists:
        archiveConfig = None

        for schema in schemas:
          if schema.matches(metric):
            log.creates('new metric %s matched schema %s' % (metric, schema.name))
            archiveConfig = [archive.getTuple() for archive in schema.archives]
            break

        if not archiveConfig:
          raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric)

        dbDir = dirname(dbFilePath)
        os.system("mkdir -p -m 755 '%s'" % dbDir)

        log.creates("creating database file %s" % dbFilePath)
        whisper.create(dbFilePath, archiveConfig)
        os.chmod(dbFilePath, 0755)
        instrumentation.increment('creates')

        # Create metadata file
        dbFileName = basename(dbFilePath)
        metaFilePath = join(dbDir, dbFileName[ :-len('.wsp') ] + '.context.pickle')
        createMetaFile(metric, schema, metaFilePath)

      try:
        t1 = time.time()
        whisper.update_many(dbFilePath, datapoints)
        t2 = time.time()
        updateTime = t2 - t1
      except:
        log.err()
        instrumentation.increment('errors')
      else:
        pointCount = len(datapoints)
        instrumentation.increment('committedPoints', pointCount)
        instrumentation.append('updateTimes', updateTime)

        if settings.LOG_UPDATES:
          log.updates("wrote %d datapoints for %s in %.5f seconds" % (pointCount, metric, updateTime))

        # Rate limit update operations
        thisSecond = int(t2)

        if thisSecond != lastSecond:
          lastSecond = thisSecond
          updates = 0
        else:
          updates += 1
          if updates >= settings.MAX_UPDATES_PER_SECOND:
            time.sleep( int(t2 + 1) - t2 )

    # Avoid churning CPU when only new metrics are in the cache
    if not dataWritten:
      time.sleep(0.1)