def integral(requestContext, seriesList): """ This will show the sum over time, sort of like a continuous addition function. Useful for finding totals or trends in metrics that are collected per minute. Example: .. code-block:: none &target=integral(company.sales.perMinute) This would start at zero on the left side of the graph, adding the sales each minute, and show the total sales for the time period selected at the right side, (time now, or the time specified by '&until='). """ results = [] for series in seriesList: newValues = [] current = 0.0 for val in series: if val is None: newValues.append(None) else: current += val newValues.append(current) newName = "integral(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def derivative(requestContext, seriesList): """ This is the opposite of the integral function. This is useful for taking a running total metric and showing how many requests per minute were handled. Example: .. code-block:: none &target=derivative(company.server.application01.ifconfig.TXPackets) Each time you run ifconfig, the RX and TXPackets are higher (assuming there is network traffic.) By applying the derivative function, you can get an idea of the packets per minute sent or received, even though you're only recording the total. """ results = [] for series in seriesList: newValues = [] prev = None for val in series: if None in (prev,val): newValues.append(None) prev = val continue newValues.append(val - prev) prev = val newName = "derivative(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def log(requestContext, seriesList, base=10): """ Takes one metric or a wildcard seriesList, a base, and draws the y-axis in logarithmic format. If base is omitted, the function defaults to base 10. Example: .. code-block:: none &target=log(carbon.agents.hostname.avgUpdateTime,2) """ results = [] for series in seriesList: newValues = [] for val in series: if val is None: newValues.append(None) elif val <= 0: newValues.append(None) else: newValues.append(math.log(val, base)) newName = "log(%s, %s)" % (series.name, base) newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def test_TimeSeries_equal_list_color(self): values = range(0,100) series1 = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) series1.color = 'white' series2 = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) series2.color = 'white' self.assertEqual(series1, series2)
def percentileOfSeries(requestContext, *args): levels = [] seriesList = [] for arg in args: logging.info("Arg: %s", arg) if isinstance(arg, (int, long, float)): levels.append(arg) elif isinstance(arg, basestring): levels += [float(x) for x in arg.split(";")] else: seriesList += arg logging.info("Levels: %s", levels) logging.info("Series: %s", seriesList) result = [] for level in levels: if levels <= 0: raise ValueError('The requested percent is required to be greater than 0') name = 'percentilesOfSeries(%s,%g)' % (seriesList[0].pathExpression, level) (start, end, step) = functions.normalize([seriesList])[1:] values = [functions._getPercentile(row, level, False) for row in functions.izip(*seriesList)] resultSeries = TimeSeries(name, start, end, step, values) resultSeries.pathExpression = name result.append(resultSeries) return result
def test_TimeSeries_equal_list_color_bad2(self): values = range(0,100) series1 = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) series2 = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) series1.color = 'white' with self.assertRaises(AssertionError): self.assertEqual(series1, series2)
def sumSeries(requestContext, *seriesLists): """ Short form: sum() This will add metrics together and return the sum at each datapoint. (See integral for a sum over time) Example: .. code-block:: none &target=sum(company.server.application*.requestsHandled) This would show the sum of all requests handled per minute (provided requestsHandled are collected once a minute). If metrics with different retention rates are combined, the coarsest metric is graphed, and the sum of the other metrics is averaged for the metrics with finer retention rates. """ try: (seriesList,start,end,step) = normalize(seriesLists) except: return [] #name = "sumSeries(%s)" % ','.join((s.name for s in seriesList)) name = "sumSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList])) values = ( safeSum(row) for row in izip(*seriesList) ) series = TimeSeries(name,start,end,step,values) series.pathExpression = name return [series]
def test_linearRegression(self): original = functions.evaluateTarget try: # series starts at 60 seconds past the epoch and continues for 600 seconds (ten minutes) # steps are every 60 seconds savedSeries = TimeSeries('test.value',180,480,60,[3,None,5,6,None,8]), functions.evaluateTarget = lambda x, y: savedSeries # input values will be ignored and replaced by regression function inputSeries = TimeSeries('test.value',1200,1500,60,[123,None,None,456,None,None,None]) inputSeries.pathExpression = 'test.value' results = functions.linearRegression({ 'startTime': datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'endTime': datetime(1970, 1, 1, 0, 25, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'localOnly': False, 'data': [], }, [ inputSeries ], '00:03 19700101', '00:08 19700101') # regression function calculated from datapoints on minutes 3 to 8 expectedResult = [ TimeSeries('linearRegression(test.value, 180, 480)',1200,1500,60,[20.0,21.0,22.0,23.0,24.0,25.0,26.0]) ] self.assertEqual(results, expectedResult) finally: functions.evaluateTarget = original
def test_TimeSeries_consolidate(self): values = list(range(0,100)) series = TimeSeries("collectd.test-db.load.value", 0, len(values)/2, 1, values) self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2)
def test_TimeSeries_iterate_valuesPerPoint_2_none_values(self): values = [None, None, None, None, None] series = TimeSeries("collectd.test-db.load.value", 0, len(values)/2, 1, values) self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, [None, None, None]) self.assertEqual(list(series), list(expected))
def test_TimeSeries_iterate_valuesPerPoint_2_avg(self): values = range(0,100) series = TimeSeries("collectd.test-db.load.value", 0, len(values)/2, 1, values) self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, [0.5, 2.5, 4.5, 6.5, 8.5, 10.5, 12.5, 14.5, 16.5, 18.5, 20.5, 22.5, 24.5, 26.5, 28.5, 30.5, 32.5, 34.5, 36.5, 38.5, 40.5, 42.5, 44.5, 46.5, 48.5, 50.5, 52.5, 54.5, 56.5, 58.5, 60.5, 62.5, 64.5, 66.5, 68.5, 70.5, 72.5, 74.5, 76.5, 78.5, 80.5, 82.5, 84.5, 86.5, 88.5, 90.5, 92.5, 94.5, 96.5, 98.5, None]) self.assertEqual(list(series), list(expected))
def test_TimeSeries_iterate_valuesPerPoint_2_min(self): values = range(0,100) series = TimeSeries("collectd.test-db.load.value", 0, 5, 1, values, consolidate='min') self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, range(0,100,2)+[None]) self.assertEqual(list(series), list(expected))
def test_TimeSeries_iterate_valuesPerPoint_2_invalid(self): values = range(0,100) series = TimeSeries("collectd.test-db.load.value", 0, 5, 1, values, consolidate='bogus') self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, range(0,100,2)+[None]) with self.assertRaisesRegexp(Exception, "Invalid consolidation function: 'bogus'"): result = list(series)
def test_TimeSeries_iterate_valuesPerPoint_2_none_values(self): values = [None, None, None, None, None] series = TimeSeries("collectd.test-db.load.value", 0, len(values)/2, 1, values) self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, [None, None, None]) values = [None, None, None, None, None, 1, 2, 3, 4] series = TimeSeries("collectd.test-db.load.value", 0, len(values)/2, 1, values, xFilesFactor=0.1) self.assertEqual(series.valuesPerPoint, 1) self.assertEqual(series.xFilesFactor, 0.1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, [None, None, 1, 2.5, 4]) self.assertEqual(list(series), list(expected)) series.xFilesFactor = 0.5 self.assertEqual(list(series), list(expected)) series.xFilesFactor = 0.500001 expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, [None, None, None, 2.5, None]) self.assertEqual(list(series), list(expected)) series.xFilesFactor = 1 self.assertEqual(list(series), list(expected))
def movingAverage(requestContext, seriesList, windowSize): """ Takes one metric or a wildcard seriesList followed by a number N of datapoints and graphs the average of N previous datapoints. N-1 datapoints are set to None at the beginning of the graph. .. code-block:: none &target=movingAverage(Server.instance01.threads.busy,10) """ for seriesIndex, series in enumerate(seriesList): newName = "movingAverage(%s,%.1f)" % (series.name, float(windowSize)) newSeries = TimeSeries(newName, series.start, series.end, series.step, []) newSeries.pathExpression = newName windowIndex = windowSize - 1 for i in range( len(series) ): if i < windowIndex: # Pad the beginning with None's since we don't have enough data newSeries.append( None ) else: window = series[i - windowIndex : i + 1] nonNull = [ v for v in window if v is not None ] if nonNull: newSeries.append( sum(nonNull) / len(nonNull) ) else: newSeries.append(None) seriesList[ seriesIndex ] = newSeries return seriesList
def test_TimeSeries_getInfo(self): values = list(range(0,100)) series = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) self.assertEqual(series.getInfo(), { 'name': 'collectd.test-db.load.value', 'values': values, 'start': 0, 'step': 1, 'end': len(values), 'pathExpression': 'collectd.test-db.load.value', 'valuesPerPoint': 1, 'consolidationFunc': 'average', 'xFilesFactor': 0, })
def test_TimeSeries_iterate_valuesPerPoint_2_last(self): values = list(range(0,100)) series = TimeSeries("collectd.test-db.load.value", 0, 5, 1, values, consolidate='last') self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, list(range(1,100,2))) self.assertEqual(list(series), list(expected)) series.consolidate(3) self.assertEqual(series.valuesPerPoint, 3) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, list(range(2,100,3)) + [99]) self.assertEqual(list(series), list(expected))
def minSeries(requestContext, *seriesLists): """ Takes one metric or a wildcard seriesList. For each datapoint from each metric passed in, pick the minimum value and graph it. Example: .. code-block:: none &target=minSeries(Server*.connections.total) """ (seriesList, start, end, step) = normalize(seriesLists) pathExprs = list( set([s.pathExpression for s in seriesList]) ) name = "minSeries(%s)" % ','.join(pathExprs) values = ( safeMin(row) for row in izip(*seriesList) ) series = TimeSeries(name, start, end, step, values) series.pathExpression = name return [series]
def diffSeries(requestContext, *seriesLists): """ Can take two or more metrics, or a single metric and a constant. Subtracts parameters 2 through n from parameter 1. Example: .. code-block:: none &target=diffSeries(service.connections.total,service.connections.failed) &target=diffSeries(service.connections.total,5) """ (seriesList,start,end,step) = normalize(seriesLists) name = "diffSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList])) values = ( safeDiff(row) for row in izip(*seriesList) ) series = TimeSeries(name,start,end,step,values) series.pathExpression = name return [series]
def nonNegativeDerivative(requestContext, seriesList, maxValue=None): """ Same as the derivative function above, but ignores datapoints that trend down. Useful for counters that increase for a long time, then wrap or reset. (Such as if a network interface is destroyed and recreated by unloading and re-loading a kernel module, common with USB / WiFi cards. Example: .. code-block:: none &target=derivative(company.server.application01.ifconfig.TXPackets) """ results = [] for series in seriesList: newValues = [] prev = None for val in series: if None in (prev, val): newValues.append(None) prev = val continue diff = val - prev if diff >= 0: newValues.append(diff) elif maxValue is not None and maxValue >= val: newValues.append( (maxValue - prev) + val + 1 ) else: newValues.append(None) prev = val newName = "nonNegativeDerivative(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def divideSeries(requestContext, dividendSeriesList, divisorSeriesList): """ Takes a dividend metric and a divisor metric and draws the division result. A constant may *not* be passed. To divide by a constant, use the scale() function (which is essentially a multiplication operation) and use the inverse of the dividend. (Division by 8 = multiplication by 1/8 or 0.125) Example: .. code-block:: none &target=asPercent(Series.dividends,Series.divisors) """ if len(divisorSeriesList) != 1: raise ValueError("divideSeries second argument must reference exactly 1 series") divisorSeries = divisorSeriesList[0] results = [] for dividendSeries in dividendSeriesList: name = "divideSeries(%s,%s)" % (dividendSeries.name, divisorSeries.name) bothSeries = (dividendSeries, divisorSeries) step = reduce(lcm,[s.step for s in bothSeries]) for s in bothSeries: s.consolidate( step / s.step ) start = min([s.start for s in bothSeries]) end = max([s.end for s in bothSeries]) end -= (end - start) % step values = ( safeDiv(v1,v2) for v1,v2 in izip(*bothSeries) ) quotientSeries = TimeSeries(name, start, end, step, values) quotientSeries.pathExpression = name results.append(quotientSeries) return results
def averageSeries(requestContext, *seriesLists): """ Short Alias: avg() Takes one metric or a wildcard seriesList. Draws the average value of all metrics passed at each time. Example: .. code-block:: none &target=averageSeries(company.server.*.threads.busy) """ (seriesList,start,end,step) = normalize(seriesLists) #name = "averageSeries(%s)" % ','.join((s.name for s in seriesList)) name = "averageSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList])) values = ( safeDiv(safeSum(row),safeLen(row)) for row in izip(*seriesList) ) series = TimeSeries(name,start,end,step,values) series.pathExpression = name return [series]
def _generate_series_list(self, config=None): seriesList = [] if not config: config = [ list(range(101)), list(range(101)), [1, None, None, None, None] ] for i, c in enumerate(config): name = "collectd.test-db{0}.load.value".format(i + 1) seriesList.append(TimeSeries(name, 0, 1, 1, c)) return seriesList
def gen_series_list_partial_none(self, start=0): data = list(range(start, start + 15)) data[2] = None data[8] = None seriesList = [ TimeSeries('stuff.things.more.things', start, start + 15, 1, data) ] for series in seriesList: series.pathExpression = series.name return seriesList
def test_check_empty_lists(self): seriesList = [] config = [[1000, 100, 10, 0], []] for i, c in enumerate(config): seriesList.append(TimeSeries('Test(%d)' % i, 0, 0, 0, c)) self.assertTrue(functions.safeIsNotEmpty(seriesList[0])) self.assertFalse(functions.safeIsNotEmpty(seriesList[1])) result = functions.removeEmptySeries({}, seriesList) self.assertEqual(1, len(result))
def test_sorting_by_total(self): seriesList = [] config = [[1000, 100, 10, 0], [1000, 100, 10, 1]] for i, c in enumerate(config): seriesList.append(TimeSeries('Test(%d)' % i, 0, 0, 0, c)) self.assertEqual(1110, functions.safeSum(seriesList[0])) result = functions.sortByTotal({}, seriesList) self.assertEqual(1111, functions.safeSum(result[0])) self.assertEqual(1110, functions.safeSum(result[1]))
def test_TimeSeries_equal_list_color(self): values = list(range(0,100)) series1 = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) series1.color = 'white' series2 = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) series2.color = 'white' self.assertEqual(series1, series2)
def test__merge_results_multiple_series(self): pathExpr = 'collectd.test-db.load.value' startTime = datetime(1970, 1, 1, 0, 10, 0, 0, pytz.timezone(settings.TIME_ZONE)), endTime = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) timeInfo = [startTime, endTime, 60] result_queue = [ [ pathExpr, [timeInfo, [0, 1, 2, 3, 4, None, None, None, None, None]] ], [ pathExpr, [timeInfo, [None, None, None, None, None, 5, 6, 7, 8, 9]] ], [ pathExpr, [ timeInfo, [None, None, None, None, None, None, None, 7, 8, 9] ] ], [pathExpr, [timeInfo, [0, 1, 2, 3, 4, None, None, 7, 8, 9]]] ] seriesList = { 'collectd.test-db.cpu.value': TimeSeries("collectd.test-db.cpu.value", startTime, endTime, 60, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) } requestContext = self._build_requestContext(startTime, endTime) results = _merge_results(pathExpr, startTime, endTime, result_queue, seriesList, requestContext) expectedResults = [ TimeSeries("collectd.test-db.cpu.value", startTime, endTime, 60, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), TimeSeries("collectd.test-db.load.value", startTime, endTime, 60, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ] self.assertEqual(results, expectedResults)
def centered_mov_avg(requestContext, seriesList, windowSize): windowInterval = None if isinstance(windowSize, basestring): delta = functions.parseTimeOffset(windowSize) windowInterval = abs(delta.seconds + (delta.days * 86400)) if windowInterval: bootstrapSeconds = windowInterval else: bootstrapSeconds = max([s.step for s in seriesList]) * int(windowSize) bootstrapList = functions._fetchWithBootstrap(requestContext, seriesList, seconds=bootstrapSeconds) result = [] for bootstrap, series in zip(bootstrapList, seriesList): if windowInterval: windowPoints = windowInterval / series.step else: windowPoints = int(windowSize) if isinstance(windowSize, basestring): newName = 'centeredMovingAverage(%s,"%s")' % (series.name, windowSize) else: newName = "centeredMovingAverage(%s,%s)" % (series.name, windowSize) newSeries = TimeSeries(newName, series.start, series.end, series.step, []) newSeries.pathExpression = newName offset = len(bootstrap) - len(series) logging.info("Offset: %s", offset) logging.info("windowPoints: %s", windowPoints) for i in range(len(series)): window = bootstrap[i + offset - windowPoints + windowPoints / 2:i + offset + windowPoints / 2] logging.info("window: %s", len(window)) newSeries.append(functions.safeAvg(window)) result.append(newSeries) return result
def gen_series_list(self, start=0, use_none=False): data = range(start, start + 15) if use_none: n = [None for d in data] data = n seriesList = [ TimeSeries('stuff.things.more.things', start, start + 15, 1, data) ] for series in seriesList: series.pathExpression = series.name return seriesList
def test_TimeSeries_iterate_valuesPerPoint_2_avg_alias(self): values = list(range(0,100)) series = TimeSeries("collectd.test-db.load.value", 0, len(values)/2, 1, values, consolidate='avg') self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, [0.5, 2.5, 4.5, 6.5, 8.5, 10.5, 12.5, 14.5, 16.5, 18.5, 20.5, 22.5, 24.5, 26.5, 28.5, 30.5, 32.5, 34.5, 36.5, 38.5, 40.5, 42.5, 44.5, 46.5, 48.5, 50.5, 52.5, 54.5, 56.5, 58.5, 60.5, 62.5, 64.5, 66.5, 68.5, 70.5, 72.5, 74.5, 76.5, 78.5, 80.5, 82.5, 84.5, 86.5, 88.5, 90.5, 92.5, 94.5, 96.5, 98.5]) self.assertEqual(list(series), list(expected)) series.consolidate(3) self.assertEqual(series.valuesPerPoint, 3) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, map(float, list(range(1, 100, 3)) + [99])) self.assertEqual(list(series), list(expected))
def applyHotSax(requestContext, seriesLists): matrix = [] time = {} f = open("/var/log/debug.txt", "w") f.write(str(len(seriesLists))) f.close() for i in range(len(seriesLists)): datapoints = seriesLists[i].datapoints() current_measurement = [] index = 0 for j in range(len(datapoints)): if datapoints[j][0] == None: continue time[index] = datapoints[j][1] index = index + 1 current_measurement.append(datapoints[j][0]) current_measurement = np.array(current_measurement) matrix.append(current_measurement) matrix = np.array(matrix).T #Apply Anomaly algorithm using SAX discord = hotsax.hotsax( matrix ) r_time = [] r_index = [] r_value = [] for i in range(len(discord)): r_time.append(time[discord[i][1]]) r_index.append(discord[i][0]) r_value.append(discord[i][2]) start = seriesLists[0].start step = seriesLists[0].step end = start + step * len(discord) return [ TimeSeries(name = 'recov.result.timestamp', start = start, end = end, step = step, values = r_time), TimeSeries(name = 'recov.result.column', start = start, end = end, step = step, values = r_index), TimeSeries(name = 'recov.result.value', start = start, end = end, step = step, values = r_value) ]
def summarize(requestContext, seriesList, intervalString): results = [] delta = parseTimeOffset(intervalString) interval = delta.seconds + (delta.days * 86400) for series in seriesList: buckets = {} timestamps = range(int(series.start), int(series.end), int(series.step)) datapoints = zip(timestamps, series) for (timestamp, value) in datapoints: bucketInterval = timestamp - (timestamp % interval) if bucketInterval not in buckets: buckets[bucketInterval] = [] if value is not None: buckets[bucketInterval].append(value) newStart = series.start - (series.start % interval) newEnd = series.end - (series.end % interval) + interval newValues = [] for timestamp in range(newStart, newEnd, interval): bucket = buckets.get(timestamp, []) if bucket: newValues.append(sum(bucket)) else: newValues.append(None) newName = "summarize(%s, \"%s\")" % (series.name, intervalString) newSeries = TimeSeries(newName, newStart, newEnd, interval, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def asPercent(requestContext, seriesList1, seriesList2orNumber): """ Takes exactly two metrics, or a metric and a constant. Draws the first metric as a percent of the second. Example: .. code-block:: none &target=asPercent(Server01.connections.failed,Server01.connections,total) &target=asPercent(apache01.threads.busy,1500) """ assert len(seriesList1) == 1, "asPercent series arguments must reference *exactly* 1 series" series1 = seriesList1[0] if type(seriesList2orNumber) is list: assert len(seriesList2orNumber) == 1, "asPercent series arguments must reference *exactly* 1 series" series2 = seriesList2orNumber[0] name = "asPercent(%s,%s)" % (series1.name,series2.name) series = (series1,series2) step = reduce(lcm,[s.step for s in series]) for s in series: s.consolidate( step / s.step ) start = min([s.start for s in series]) end = max([s.end for s in series]) end -= (end - start) % step values = ( safeMul( safeDiv(v1,v2), 100.0 ) for v1,v2 in izip(*series) ) else: number = float(seriesList2orNumber) name = "asPercent(%s,%.1f)" % (series1.name,number) step = series1.step start = series1.start end = series1.end values = ( safeMul( safeDiv(v,number), 100.0 ) for v in series1 ) series = TimeSeries(name,start,end,step,values) series.pathExpression = name return [series]
def summarize(requestContext, seriesList, intervalString): results = [] delta = parseTimeOffset(intervalString) interval = delta.seconds + (delta.days * 86400) for series in seriesList: buckets = {} timestamps = range( int(series.start), int(series.end), int(series.step) ) datapoints = zip(timestamps, series) for (timestamp, value) in datapoints: bucketInterval = timestamp - (timestamp % interval) if bucketInterval not in buckets: buckets[bucketInterval] = [] if value is not None: buckets[bucketInterval].append(value) newStart = series.start - (series.start % interval) newEnd = series.end - (series.end % interval) + interval newValues = [] for timestamp in range(newStart, newEnd, interval): bucket = buckets.get(timestamp, []) if bucket: newValues.append( sum(bucket) ) else: newValues.append( None ) newName = "summarize(%s, \"%s\")" % (series.name, intervalString) newSeries = TimeSeries(newName, newStart, newEnd, interval, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def stacked(requestContext,seriesLists): """ Takes one metric or a wildcard seriesList and change them so they are stacked. This is a way of stacking just a couple of metrics without having to use the stacked area mode (that stacks everything). By means of this a mixed stacked and non stacked graph can be made Example: .. code-block:: none &target=stacked(company.server.application01.ifconfig.TXPackets) """ if 'totalStack' in requestContext: totalStack = requestContext['totalStack'] else: totalStack = []; results = [] for series in seriesLists: newValues = [] for i in range(len(series)): if len(totalStack) <= i: totalStack.append(0) if series[i] is not None: totalStack[i] += series[i] newValues.append(totalStack[i]) else: newValues.append(None) newName = "stacked(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.options['stacked'] = True newSeries.pathExpression = newName results.append(newSeries) requestContext['totalStack'] = totalStack return results
def test_linearRegression(self): original = functions.evaluateTarget try: # series starts at 60 seconds past the epoch and continues for 600 seconds (ten minutes) # steps are every 60 seconds savedSeries = TimeSeries('test.value', 180, 480, 60, [3, None, 5, 6, None, 8]), functions.evaluateTarget = lambda x, y: savedSeries # input values will be ignored and replaced by regression function inputSeries = TimeSeries('test.value', 1200, 1500, 60, [123, None, None, 456, None, None, None]) inputSeries.pathExpression = 'test.value' results = functions.linearRegression( { 'startTime': datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'endTime': datetime(1970, 1, 1, 0, 25, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'localOnly': False, 'data': [], }, [inputSeries], '00:03 19700101', '00:08 19700101') # regression function calculated from datapoints on minutes 3 to 8 expectedResult = [ TimeSeries('linearRegression(test.value, 180, 480)', 1200, 1500, 60, [20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0]) ] self.assertEqual(results, expectedResult) finally: functions.evaluateTarget = original
def applyKNN(requestContext, labelSeries, labelColumn, unlabelSeries): labelMatrix = [] for i in range(len(labelSeries)): datapoints = labelSeries[i].datapoints() current_measurements = [] for j in range(len(datapoints)): if datapoints[j][0] == None: continue current_measurements.append(datapoints[j][0]) current_measurements = np.array(current_measurements) labelMatrix.append(current_measurements) labelMatrix = np.array(labelMatrix).T unlabelMatrix = [] for i in range(len(unlabelSeries)): datapoints = unlabelSeries[i].datapoints() current_measurements = [] for j in range(len(datapoints)): if datapoints[j][0] == None: continue current_measurements.append(datapoints[j][0]) current_measurements = np.array(current_measurements) unlabelMatrix.append(current_measurements) unlabelMatrix = np.array(unlabelMatrix).T labels = [] datapoints = labelColumn[0].datapoints() for j in range(len(datapoints)): if datapoints[j][0] == None: continue labels.append(datapoints[j][0]) result = knn.knn(labelMatrix, labels, unlabelMatrix, 3) startTime = labelSeries[0].start stepTime = labelSeries[0].step endTime = startTime + len(result) * stepTime return [ TimeSeries(name="master.knn.dist", start=startTime, end=endTime, step=stepTime, values=result) ]
def movingAverage(requestContext, seriesList, windowSize): for seriesIndex, series in enumerate(seriesList): newName = "movingAverage(%s,%.1f)" % (series.name, float(windowSize)) newSeries = TimeSeries(newName, series.start, series.end, series.step, []) newSeries.pathExpression = newName windowIndex = windowSize - 1 for i in range(len(series)): if i < windowIndex: # Pad the beginning with None's since we don't have enough data newSeries.append(None) else: window = series[i - windowIndex:i + 1] nonNull = [v for v in window if v is not None] if nonNull: newSeries.append(sum(nonNull) / len(nonNull)) else: newSeries.append(None) seriesList[seriesIndex] = newSeries return seriesList
def test__merge_results_no_remote_store_merge_results(self): pathExpr = 'collectd.test-db.load.value' startTime=datetime(1970, 1, 1, 0, 10, 0, 0, pytz.timezone(settings.TIME_ZONE)) endTime=datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) timeInfo = [startTime, endTime, 60] result_queue = [ [pathExpr, [timeInfo, [0,1,2,3,4,None,None,None,None,None]]], [pathExpr, [timeInfo, [None,None,None,3,4,5,6,7,8,9]]], [pathExpr, [timeInfo, [None,None,None,None,None,None,None,7,8,9]]] ] seriesList = {} requestContext = self._build_requestContext(startTime, endTime) with self.settings(REMOTE_STORE_MERGE_RESULTS=False): results = _merge_results(pathExpr, startTime, endTime, result_queue, seriesList, requestContext) expectedResults = [ TimeSeries("collectd.test-db.load.value", startTime, endTime, 60, [None,None,None,3,4,5,6,7,8,9]), ] self.assertEqual(results, expectedResults)
def applyDstreeSearch(requestContext, seriesLists, columns): matrix = [] for i in range(len(seriesLists)): datapoints = seriesLists[i].datapoints() current_measurements = [] for j in range(len(datapoints)): if datapoints[j][0] == None: continue current_measurements.append( datapoints[j][0] ) current_measurements = np.array(current_measurements) matrix.append(current_measurements) matrix = np.array(matrix) distances = IndexExactSearcher.search(matrix, "/var/log/out/index.idx_dyn_100_1_" + str(columns)) startTime = seriesLists[0].start stepTime = seriesLists[0].step endTime = startTime + len(distances) * stepTime return [ TimeSeries(name = "master.dstree.dist", start = startTime, end = endTime, step = stepTime, values = distances) ]
def test_reduceSeries_asPercent(self): seriesList = [ TimeSeries('group.server1.bytes_used',0,1,1,[1]), TimeSeries('group.server1.total_bytes',0,1,1,[2]), TimeSeries('group.server2.bytes_used',0,1,1,[3]), TimeSeries('group.server2.total_bytes',0,1,1,[4]), ] for series in seriesList: series.pathExpression = "tempPath" expectedResult = [ TimeSeries('group.server1.reduce.asPercent',0,1,1,[50]), #100*1/2 TimeSeries('group.server2.reduce.asPercent',0,1,1,[75]) #100*3/4 ] mappedResult = [seriesList[0]],[seriesList[1]], [seriesList[2]],[seriesList[3]] results = functions.reduceSeries({}, copy.deepcopy(mappedResult), "asPercent", 2, "bytes_used", "total_bytes") self.assertEqual(results,expectedResult)
def applySaxRepresentation(requestContext, seriesLists): matrix = [] for i in range(len(seriesLists)): datapoints = seriesLists[i].datapoints() current_measurements = [] for j in range(len(datapoints)): if datapoints[j][0] == None: continue current_measurements.append( datapoints[j][0] ) current_measurements = np.array(current_measurements) matrix.append(current_measurements) matrix = np.array(matrix).T saxRepresentation = saxtransformation.saxrepresentation(matrix) result = [] timeSeriesCount = matrix.shape[1] startTime = seriesLists[0].start stepTime = seriesLists[0].step endTime = startTime + len(seriesLists[0]) * stepTime for j in range(timeSeriesCount): result.append( TimeSeries(name = "master.saxrepresentation.dim" + str(j), start = startTime, end = endTime, step = stepTime, values = list(saxRepresentation[j])) ) return result
def applyRecov(requestContext, seriesLists): matrix = [] timestamps = [] for i in range(len(seriesLists)): datapoints = seriesLists[i].datapoints() current_measurement = [] for j in range(len(datapoints)): if datapoints[j][0] == None: continue current_measurement.append(datapoints[j][0]) current_measurement = np.array(current_measurement) matrix.append(current_measurement) matrix = np.array(matrix).T n, m = matrix.shape rec_time, iter, rmse, rec_mat = recovery.recovery(matrix, n, m, 3, 0.2, 10) result = [] for i in range(m): startTime = seriesLists[i].start stepTime = seriesLists[i].step datapoints = seriesLists[i].datapoints() current_measurement = [] index_result = 0 for j in range(len(datapoints)): if datapoints[j][0] == None: current_measurement.append(None) else: current_measurement.append(rec_mat[index_result][i]) index_result = index_result + 1 endTime = startTime + len(current_measurement) * stepTime result.append( TimeSeries(name="recov.result.dim" + str(i), start=startTime, end=endTime, step=stepTime, values=current_measurement)) return result
def applyCD(requestContext, seriesLists): """Custom function that runs cd""" # Extract the data into 2D numpy.array matrix = [] for i in range(len(seriesLists)): datapoints = seriesLists[i].datapoints() current_measurements = [] for j in range(len(datapoints)): if datapoints[j][0] == None: continue current_measurements.append(datapoints[j][0]) current_measurements = np.array(current_measurements) matrix.append(current_measurements) matrix = np.array(matrix).T f = open("/var/log/debug.txt", "w") f.write(str(matrix.shape) + "\n") f.write(str(matrix)) f.close() # Apply KMeans matrix_l, matrix_r, z = cd_ssv.CD(matrix, matrix.shape[0], matrix.shape[1]) # Format the data into TimeSeries result = [] timeSeriesCount = matrix_r.shape[1] startTime = seriesLists[0].start stepTime = seriesLists[0].step endTime = startTime + timeSeriesCount * stepTime for j in range(timeSeriesCount): result.append( TimeSeries(name="master.cd.dim" + str(j), start=startTime, end=endTime, step=stepTime, values=matrix_r.T[j].tolist())) return result
def testNPercentile(self): seriesList = [] config = [ [15, 35, 20, 40, 50], range(1, 101), range(1, 201), range(1, 301), range(0, 100), range(0, 200), range(0, 300), [None, None, None] + range(0, 300), # Ensure None values in list has no affect. ] for i, c in enumerate(config): seriesList.append( TimeSeries('Test(%d)' % i, 0, 0, 0, c) ) def TestNPercentile(perc, expected): result = functions.nPercentile({}, seriesList, perc) self.assertEquals(expected, result) TestNPercentile(30, [ [20], [30], [60], [90], [29], [59], [89], [89] ]) TestNPercentile(90, [ [50], [90], [180], [270], [89], [179], [269], [269] ]) TestNPercentile(95, [ [50], [95], [190], [285], [94], [189], [284], [284] ])
def test_n_percentile(self): seriesList = [] config = [ [15, 35, 20, 40, 50], range(1, 101), range(1, 201), range(1, 301), range(0, 100), range(0, 200), range(0, 300), # Ensure None values in list has no effect. [None, None, None] + range(0, 300), ] for i, c in enumerate(config): seriesList.append(TimeSeries('Test(%d)' % i, 0, 1, 1, c)) def n_percentile(perc, expected): result = functions.nPercentile({}, seriesList, perc) self.assertEqual(expected, result) n_percentile(30, [[20], [31], [61], [91], [30], [60], [90], [90]]) n_percentile(90, [[50], [91], [181], [271], [90], [180], [270], [270]]) n_percentile(95, [[50], [96], [191], [286], [95], [190], [285], [285]])
def test__fetchData_remote_fetch_data(self): pathExpr = 'collectd.test-db.load.value' startDateTime = datetime(1970, 1, 1, 0, 10, 0, 0, pytz.timezone(settings.TIME_ZONE)) endDateTime = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) requestContext = self._build_requestContext(startDateTime, endDateTime) requestContext['now'] = endDateTime requestContext['forwardHeaders'] = None # Use this form of the start/end times (startTime, endTime, now) = timebounds(requestContext) # First item in list is a proper fetched response # Second item is None, which is what happens if there is no data back from wait_for_results prefetched_results = [[{ 'pathExpression': 'collectd.test-db.load.value', 'name': 'collectd.test-db.load.value', 'time_info': (startTime, endTime, now), 'step': 60, 'values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] }], None] # Get the remote data requestContext['prefetched'] = {} requestContext['prefetched'][( startTime, endTime, now)] = PrefetchedData(prefetched_results) with self.settings(REMOTE_PREFETCH_DATA=True): results = _fetchData(pathExpr, startTime, endTime, now, requestContext, {}) expectedResults = [ TimeSeries("collectd.test-db.load.value", startTime, endTime, 1200, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ] self.assertEqual(results, expectedResults)
def test_TimeSeries_iterate_valuesPerPoint_2_sum(self): values = list(range(0,100)) series = TimeSeries("collectd.test-db.load.value", 0, 5, 1, values, consolidate='sum') self.assertEqual(series.valuesPerPoint, 1) series.consolidate(2) self.assertEqual(series.valuesPerPoint, 2) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, list(range(1,200,4))) self.assertEqual(list(series), list(expected)) series.consolidate(3) self.assertEqual(series.valuesPerPoint, 3) expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, list(range(3,300,9)) + [99]) self.assertEqual(list(series), list(expected)) series.xFilesFactor = 0.4 expected = TimeSeries("collectd.test-db.load.value", 0, 5, 1, list(range(3,300,9)) + [None]) self.assertEqual(list(series), list(expected))
def test_applyByNode(self): seriesList = [ TimeSeries('servers.s1.disk.bytes_used', 0, 3, 1, [10, 20, 30]), TimeSeries('servers.s1.disk.bytes_free', 0, 3, 1, [90, 80, 70]), TimeSeries('servers.s2.disk.bytes_used', 0, 3, 1, [1, 2, 3]), TimeSeries('servers.s2.disk.bytes_free', 0, 3, 1, [99, 98, 97]) ] for series in seriesList: series.pathExpression = series.name def mock_data_fetcher(reqCtx, path_expression): rv = [] for s in seriesList: if s.name == path_expression or fnmatch( s.name, path_expression): rv.append(s) if rv: return rv raise KeyError('{} not found!'.format(path_expression)) expectedResults = [ TimeSeries('servers.s1.disk.pct_used', 0, 3, 1, [0.10, 0.20, 0.30]), TimeSeries('servers.s2.disk.pct_used', 0, 3, 1, [0.01, 0.02, 0.03]) ] with patch('graphite.render.evaluator.fetchData', mock_data_fetcher): result = functions.applyByNode( { 'startTime': datetime(1970, 1, 1, 0, 0, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'endTime': datetime(1970, 1, 1, 0, 9, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'localOnly': False, }, seriesList, 1, 'divideSeries(%.disk.bytes_used, sumSeries(%.disk.bytes_*))', '%.disk.pct_used') self.assertEqual(result, expectedResults)
def test_fetch_no_tag_support(self): class TestFinderNoTags(BaseFinder): tags = False def find_nodes(self, query): pass def fetch(self, patterns, start_time, end_time, now=None, requestContext=None): if patterns != ['notags;hello=tiger']: raise Exception('Unexpected patterns %s' % str(patterns)) return [{ 'pathExpression': 'notags;hello=tiger', 'name': 'notags;hello=tiger', 'time_info': (0, 60, 1), 'values': [], }] tagdb = Mock() def mockFindSeries(exprs, requestContext=None): self.assertEqual(requestContext, request_context) if exprs == ('hello=tiger', ) or exprs == ('name=notags', ): return ['notags;hello=tiger'] if exprs == ('name=testtags', ): return [] raise Exception('Unexpected exprs %s' % str(exprs)) tagdb.find_series.side_effect = mockFindSeries store = Store(finders=[TestFinderNoTags()], tagdb=tagdb) with patch('graphite.render.datalib.STORE', store): request_context = { 'startTime': epoch_to_dt(0), 'endTime': epoch_to_dt(60), 'now': epoch_to_dt(60), } results = evaluateTarget(request_context, [ 'notags;hello=tiger', 'seriesByTag("hello=tiger")', 'seriesByTag("name=testtags")', 'seriesByTag("name=notags")' ]) self.assertEqual(tagdb.find_series.call_count, 3) self.assertEqual(results, [ TimeSeries('notags;hello=tiger', 0, 60, 1, []), TimeSeries('notags;hello=tiger', 0, 60, 1, [], pathExpression='seriesByTag("hello=tiger")'), TimeSeries('notags;hello=tiger', 0, 60, 1, [], pathExpression='seriesByTag("name=notags")'), ])
def test_fetch_tag_support(self): class TestFinderTags(BaseFinder): tags = True def find_nodes(self, query): pass def fetch(self, patterns, start_time, end_time, now=None, requestContext=None): if patterns != [ 'seriesByTag("hello=tiger")', 'seriesByTag("name=notags")', 'seriesByTag("name=testtags")', 'testtags;hello=tiger' ]: raise Exception('Unexpected patterns %s' % str(patterns)) return [ { 'pathExpression': 'testtags;hello=tiger', 'name': 'testtags;hello=tiger', 'time_info': (0, 60, 1), 'values': [], }, { 'pathExpression': 'seriesByTag("hello=tiger")', 'name': 'testtags;hello=tiger', 'time_info': (0, 60, 1), 'values': [], }, { 'pathExpression': 'seriesByTag("name=testtags")', 'name': 'testtags;hello=tiger', 'time_info': (0, 60, 1), 'values': [], }, ] tagdb = Mock() store = Store(finders=[TestFinderTags()], tagdb=tagdb) request_context = { 'startTime': epoch_to_dt(0), 'endTime': epoch_to_dt(60), 'now': epoch_to_dt(60), } with patch('graphite.render.datalib.STORE', store): results = evaluateTarget(request_context, [ 'testtags;hello=tiger', 'seriesByTag("hello=tiger")', 'seriesByTag("name=testtags")', 'seriesByTag("name=notags")' ]) self.assertEqual(results, [ TimeSeries('testtags;hello=tiger', 0, 60, 1, []), TimeSeries('testtags;hello=tiger', 0, 60, 1, [], pathExpression='seriesByTag("hello=tiger")'), TimeSeries('testtags;hello=tiger', 0, 60, 1, [], pathExpression='seriesByTag("name=testtags")'), ])
def stdev(requestContext, seriesList, time): """ Takes one metric or a wildcard seriesList followed by an integer N. Draw the Standard Deviation of all metrics passed for the past N datapoints. Example: .. code-block:: none &target=stddev(server*.instance*.threads.busy,30) """ count = 0 for series in seriesList: stddevs = TimeSeries("stddev(%s,%.1f)" % (series.name, float(time)), series.start, series.end, series.step, []) stddevs.pathExpression = "stddev(%s,%.1f)" % (series.name, float(time)) avg = safeDiv(safeSum(series[:time]), time) if avg is not None: sumOfSquares = sum(map(lambda(x): x * x, [v for v in series[:time] if v is not None])) (sd, sumOfSquares) = doStdDev(sumOfSquares, 0, 0, time, avg) stddevs.append(sd) else: stddevs.append(None) for (index, el) in enumerate(series[time:]): if el is None: continue toDrop = series[index] if toDrop is None: toDrop = 0 s = safeSum([safeMul(time, avg), el, -toDrop]) avg = safeDiv(s, time) if avg is not None: (sd, sumOfSquares) = doStdDev(sumOfSquares, toDrop, series[index+time], time, avg) stddevs.append(sd) else: stddevs.append(None) for i in range(0, time-1): stddevs.insert(0, None) seriesList[count] = stddevs count = count + 1 return seriesList
def test_nonempty_false_nones(self): series = TimeSeries("collectd.test-db.load.value", 0, 4, 1, [None, None, None, None]) self.assertFalse(nonempty(series))
def test_nonempty_true(self): values = range(0, 100) series = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) self.assertTrue(nonempty(series))
def test_TimeSeries_equal_list(self): values = range(0, 100) series = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) with self.assertRaises(AssertionError): self.assertEqual(values, series)
def test_TimeSeries_init_tag_parse_fail(self): series = TimeSeries("collectd.test-db.load.value;", 0, 2, 1, [1, 2]) self.assertEqual(series.tags, {'name': 'collectd.test-db.load.value;'})
def hitcount(requestContext, seriesList, intervalString): """ Estimate hit counts from a list of time series. This function assumes the values in each time series represent hits per second. It calculates hits per some larger interval such as per day or per hour. This function is like summarize(), except that it compensates automatically for different time scales (so that a similar graph results from using either fine-grained or coarse-grained records) and handles rarely-occurring events gracefully. """ results = [] delta = parseTimeOffset(intervalString) interval = int(delta.seconds + (delta.days * 86400)) for series in seriesList: length = len(series) step = int(series.step) bucket_count = int(math.ceil(float(series.end - series.start) / interval)) buckets = [[] for _ in range(bucket_count)] newStart = int(series.end - bucket_count * interval) for i, value in enumerate(series): if value is None: continue start_time = int(series.start + i * step) start_bucket, start_mod = divmod(start_time - newStart, interval) end_time = start_time + step end_bucket, end_mod = divmod(end_time - newStart, interval) if end_bucket >= bucket_count: end_bucket = bucket_count - 1 end_mod = interval if start_bucket == end_bucket: # All of the hits go to a single bucket. if start_bucket >= 0: buckets[start_bucket].append(value * (end_mod - start_mod)) else: # Spread the hits among 2 or more buckets. if start_bucket >= 0: buckets[start_bucket].append(value * (interval - start_mod)) hits_per_bucket = value * interval for j in range(start_bucket + 1, end_bucket): buckets[j].append(hits_per_bucket) if end_mod > 0: buckets[end_bucket].append(value * end_mod) newValues = [] for bucket in buckets: if bucket: newValues.append( sum(bucket) ) else: newValues.append(None) newName = 'hitcount(%s, "%s")' % (series.name, intervalString) newSeries = TimeSeries(newName, newStart, series.end, interval, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def test_TimeSeries_getInfo(self): values = range(0,100) series = TimeSeries("collectd.test-db.load.value", 0, len(values), 1, values) self.assertEqual(series.getInfo(), {'name': 'collectd.test-db.load.value', 'values': values, 'start': 0, 'step': 1, 'end': len(values), 'pathExpression': 'collectd.test-db.load.value'} )