def test_linearRegression(self): original = functions.evaluateTarget try: # series starts at 60 seconds past the epoch and continues for 600 seconds (ten minutes) # steps are every 60 seconds savedSeries = TimeSeries('test.value',180,480,60,[3,None,5,6,None,8]), functions.evaluateTarget = lambda x, y: savedSeries # input values will be ignored and replaced by regression function inputSeries = TimeSeries('test.value',1200,1500,60,[123,None,None,456,None,None,None]) inputSeries.pathExpression = 'test.value' results = functions.linearRegression({ 'startTime': datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'endTime': datetime(1970, 1, 1, 0, 25, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'localOnly': False, 'data': [], }, [ inputSeries ], '00:03 19700101', '00:08 19700101') # regression function calculated from datapoints on minutes 3 to 8 expectedResult = [ TimeSeries('linearRegression(test.value, 180, 480)',1200,1500,60,[20.0,21.0,22.0,23.0,24.0,25.0,26.0]) ] self.assertEqual(results, expectedResult) finally: functions.evaluateTarget = original
def test_linearRegression(self): original = functions.evaluateTarget try: # series starts at 60 seconds past the epoch and continues for 600 seconds (ten minutes) # steps are every 60 seconds savedSeries = TimeSeries('test.value',180,480,60,[3,None,5,6,None,8]), functions.evaluateTarget = lambda x, y: savedSeries # input values will be ignored and replaced by regression function inputSeries = TimeSeries('test.value',1200,1500,60,[123,None,None,456,None,None,None]) inputSeries.pathExpression = 'test.value' results = functions.linearRegression({ 'startTime': datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'endTime': datetime(1970, 1, 1, 0, 25, 0, 0, pytz.timezone(settings.TIME_ZONE)), 'localOnly': False, 'data': [], }, [ inputSeries ], '00:03 19700101', '00:08 19700101') # regression function calculated from datapoints on minutes 3 to 8 expectedResult = [ TimeSeries('linearRegression(test.value, 180, 480)',1200,1500,60,[20.0,21.0,22.0,23.0,24.0,25.0,26.0]) ] self.assertEqual(results, expectedResult) finally: functions.evaluateTarget = original
def derivative(requestContext, seriesList): """ This is the opposite of the integral function. This is useful for taking a running total metric and showing how many requests per minute were handled. Example: .. code-block:: none &target=derivative(company.server.application01.ifconfig.TXPackets) Each time you run ifconfig, the RX and TXPackets are higher (assuming there is network traffic.) By applying the derivative function, you can get an idea of the packets per minute sent or received, even though you're only recording the total. """ results = [] for series in seriesList: newValues = [] prev = None for val in series: if None in (prev,val): newValues.append(None) prev = val continue newValues.append(val - prev) prev = val newName = "derivative(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def log(requestContext, seriesList, base=10): """ Takes one metric or a wildcard seriesList, a base, and draws the y-axis in logarithmic format. If base is omitted, the function defaults to base 10. Example: .. code-block:: none &target=log(carbon.agents.hostname.avgUpdateTime,2) """ results = [] for series in seriesList: newValues = [] for val in series: if val is None: newValues.append(None) elif val <= 0: newValues.append(None) else: newValues.append(math.log(val, base)) newName = "log(%s, %s)" % (series.name, base) newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def movingAverage(requestContext, seriesList, windowSize): """ Takes one metric or a wildcard seriesList followed by a number N of datapoints and graphs the average of N previous datapoints. N-1 datapoints are set to None at the beginning of the graph. .. code-block:: none &target=movingAverage(Server.instance01.threads.busy,10) """ for seriesIndex, series in enumerate(seriesList): newName = "movingAverage(%s,%.1f)" % (series.name, float(windowSize)) newSeries = TimeSeries(newName, series.start, series.end, series.step, []) newSeries.pathExpression = newName windowIndex = windowSize - 1 for i in range( len(series) ): if i < windowIndex: # Pad the beginning with None's since we don't have enough data newSeries.append( None ) else: window = series[i - windowIndex : i + 1] nonNull = [ v for v in window if v is not None ] if nonNull: newSeries.append( sum(nonNull) / len(nonNull) ) else: newSeries.append(None) seriesList[ seriesIndex ] = newSeries return seriesList
def asPercent(requestContext, seriesList1, seriesList2orNumber): assert len( seriesList1 ) == 1, "asPercent series arguments must reference *exactly* 1 series" series1 = seriesList1[0] if type(seriesList2orNumber) is list: assert len( seriesList2orNumber ) == 1, "asPercent series arguments must reference *exactly* 1 series" series2 = seriesList2orNumber[0] name = "asPercent(%s,%s)" % (series1.name, series2.name) series = (series1, series2) step = reduce(lcm, [s.step for s in series]) for s in series: s.consolidate(step / s.step) start = min([s.start for s in series]) end = max([s.end for s in series]) end -= (end - start) % step values = (safeMul(safeDiv(v1, v2), 100.0) for v1, v2 in izip(*series)) else: number = float(seriesList2orNumber) name = "asPercent(%s,%.1f)" % (series1.name, number) step = series1.step start = series1.start end = series1.end values = (safeMul(safeDiv(v, number), 100.0) for v in series1) series = TimeSeries(name, start, end, step, values) series.pathExpression = name return [series]
def sumSeries(requestContext, *seriesLists): """ Short form: sum() This will add metrics together and return the sum at each datapoint. (See integral for a sum over time) Example: .. code-block:: none &target=sum(company.server.application*.requestsHandled) This would show the sum of all requests handled per minute (provided requestsHandled are collected once a minute). If metrics with different retention rates are combined, the coarsest metric is graphed, and the sum of the other metrics is averaged for the metrics with finer retention rates. """ try: (seriesList,start,end,step) = normalize(seriesLists) except: return [] #name = "sumSeries(%s)" % ','.join((s.name for s in seriesList)) name = "sumSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList])) values = ( safeSum(row) for row in izip(*seriesList) ) series = TimeSeries(name,start,end,step,values) series.pathExpression = name return [series]
def divideSeries(requestContext, dividendSeriesList, divisorSeriesList): if len(divisorSeriesList) != 1: raise ValueError( "divideSeries second argument must reference exactly 1 series") divisorSeries = divisorSeriesList[0] results = [] for dividendSeries in dividendSeriesList: name = "divideSeries(%s,%s)" % (dividendSeries.name, divisorSeries.name) bothSeries = (dividendSeries, divisorSeries) step = reduce(lcm, [s.step for s in bothSeries]) for s in bothSeries: s.consolidate(step / s.step) start = min([s.start for s in bothSeries]) end = max([s.end for s in bothSeries]) end -= (end - start) % step values = (safeDiv(v1, v2) for v1, v2 in izip(*bothSeries)) quotientSeries = TimeSeries(name, start, end, step, values) quotientSeries.pathExpression = name results.append(quotientSeries) return results
def nonNegativeDerivative(requestContext, seriesList, maxValue=None): results = [] for series in seriesList: newValues = [] prev = None for val in series: if None in (prev, val): newValues.append(None) prev = val continue diff = val - prev if diff >= 0: newValues.append(diff) elif maxValue is not None and maxValue >= val: newValues.append((maxValue - prev) + val + 1) else: newValues.append(None) prev = val newName = "nonNegativeDerivative(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def integral(requestContext, seriesList): """ This will show the sum over time, sort of like a continuous addition function. Useful for finding totals or trends in metrics that are collected per minute. Example: .. code-block:: none &target=integral(company.sales.perMinute) This would start at zero on the left side of the graph, adding the sales each minute, and show the total sales for the time period selected at the right side, (time now, or the time specified by '&until='). """ results = [] for series in seriesList: newValues = [] current = 0.0 for val in series: if val is None: newValues.append(None) else: current += val newValues.append(current) newName = "integral(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def percentileOfSeries(requestContext, *args): levels = [] seriesList = [] for arg in args: logging.info("Arg: %s", arg) if isinstance(arg, (int, long, float)): levels.append(arg) elif isinstance(arg, basestring): levels += [float(x) for x in arg.split(";")] else: seriesList += arg logging.info("Levels: %s", levels) logging.info("Series: %s", seriesList) result = [] for level in levels: if levels <= 0: raise ValueError('The requested percent is required to be greater than 0') name = 'percentilesOfSeries(%s,%g)' % (seriesList[0].pathExpression, level) (start, end, step) = functions.normalize([seriesList])[1:] values = [functions._getPercentile(row, level, False) for row in functions.izip(*seriesList)] resultSeries = TimeSeries(name, start, end, step, values) resultSeries.pathExpression = name result.append(resultSeries) return result
def maxSeries(requestContext, *seriesLists): (seriesList, start, end, step) = normalize(seriesLists) pathExprs = list(set([s.pathExpression for s in seriesList])) name = "maxSeries(%s)" % ','.join(pathExprs) values = (safeMax(row) for row in izip(*seriesList)) series = TimeSeries(name, start, end, step, values) series.pathExpression = name return [series]
def averageSeries(requestContext, *seriesLists): (seriesList, start, end, step) = normalize(seriesLists) #name = "averageSeries(%s)" % ','.join((s.name for s in seriesList)) name = "averageSeries(%s)" % ','.join( set([s.pathExpression for s in seriesList])) values = (safeDiv(safeSum(row), safeLen(row)) for row in izip(*seriesList)) series = TimeSeries(name, start, end, step, values) series.pathExpression = name return [series]
def sumSeries(requestContext, *seriesLists): try: (seriesList, start, end, step) = normalize(seriesLists) except: return [] #name = "sumSeries(%s)" % ','.join((s.name for s in seriesList)) name = "sumSeries(%s)" % ','.join( set([s.pathExpression for s in seriesList])) values = (safeSum(row) for row in izip(*seriesList)) series = TimeSeries(name, start, end, step, values) series.pathExpression = name return [series]
def stdev(requestContext, seriesList, time): """ Takes one metric or a wildcard seriesList followed by an integer N. Draw the Standard Deviation of all metrics passed for the past N datapoints. Example: .. code-block:: none &target=stddev(server*.instance*.threads.busy,30) """ count = 0 for series in seriesList: stddevs = TimeSeries("stddev(%s,%.1f)" % (series.name, float(time)), series.start, series.end, series.step, []) stddevs.pathExpression = "stddev(%s,%.1f)" % (series.name, float(time)) avg = safeDiv(safeSum(series[:time]), time) if avg is not None: sumOfSquares = sum(map(lambda(x): x * x, [v for v in series[:time] if v is not None])) (sd, sumOfSquares) = doStdDev(sumOfSquares, 0, 0, time, avg) stddevs.append(sd) else: stddevs.append(None) for (index, el) in enumerate(series[time:]): if el is None: continue toDrop = series[index] if toDrop is None: toDrop = 0 s = safeSum([safeMul(time, avg), el, -toDrop]) avg = safeDiv(s, time) if avg is not None: (sd, sumOfSquares) = doStdDev(sumOfSquares, toDrop, series[index+time], time, avg) stddevs.append(sd) else: stddevs.append(None) for i in range(0, time-1): stddevs.insert(0, None) seriesList[count] = stddevs count = count + 1 return seriesList
def log(requestContext, seriesList, base=10): results = [] for series in seriesList: newValues = [] for val in series: if val is None: newValues.append(None) elif val <= 0: newValues.append(None) else: newValues.append(math.log(val, base)) newName = "log(%s, %s)" % (series.name, base) newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def integral(requestContext, seriesList): results = [] for series in seriesList: newValues = [] current = 0.0 for val in series: if val is None: newValues.append(None) else: current += val newValues.append(current) newName = "integral(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def derivative(requestContext, seriesList): results = [] for series in seriesList: newValues = [] prev = None for val in series: if None in (prev, val): newValues.append(None) prev = val continue newValues.append(val - prev) prev = val newName = "derivative(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def minSeries(requestContext, *seriesLists): """ Takes one metric or a wildcard seriesList. For each datapoint from each metric passed in, pick the minimum value and graph it. Example: .. code-block:: none &target=minSeries(Server*.connections.total) """ (seriesList, start, end, step) = normalize(seriesLists) pathExprs = list( set([s.pathExpression for s in seriesList]) ) name = "minSeries(%s)" % ','.join(pathExprs) values = ( safeMin(row) for row in izip(*seriesList) ) series = TimeSeries(name, start, end, step, values) series.pathExpression = name return [series]
def diffSeries(requestContext, *seriesLists): """ Can take two or more metrics, or a single metric and a constant. Subtracts parameters 2 through n from parameter 1. Example: .. code-block:: none &target=diffSeries(service.connections.total,service.connections.failed) &target=diffSeries(service.connections.total,5) """ (seriesList,start,end,step) = normalize(seriesLists) name = "diffSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList])) values = ( safeDiff(row) for row in izip(*seriesList) ) series = TimeSeries(name,start,end,step,values) series.pathExpression = name return [series]
def nonNegativeDerivative(requestContext, seriesList, maxValue=None): """ Same as the derivative function above, but ignores datapoints that trend down. Useful for counters that increase for a long time, then wrap or reset. (Such as if a network interface is destroyed and recreated by unloading and re-loading a kernel module, common with USB / WiFi cards. Example: .. code-block:: none &target=derivative(company.server.application01.ifconfig.TXPackets) """ results = [] for series in seriesList: newValues = [] prev = None for val in series: if None in (prev, val): newValues.append(None) prev = val continue diff = val - prev if diff >= 0: newValues.append(diff) elif maxValue is not None and maxValue >= val: newValues.append( (maxValue - prev) + val + 1 ) else: newValues.append(None) prev = val newName = "nonNegativeDerivative(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def stdev(requestContext, seriesList, time): count = 0 for series in seriesList: stddevs = TimeSeries("stddev(%s,%.1f)" % (series.name, float(time)), series.start, series.end, series.step, []) stddevs.pathExpression = "stddev(%s,%.1f)" % (series.name, float(time)) avg = safeDiv(safeSum(series[:time]), time) if avg is not None: sumOfSquares = sum( map(lambda (x): x * x, [v for v in series[:time] if v is not None])) (sd, sumOfSquares) = doStdDev(sumOfSquares, 0, 0, time, avg) stddevs.append(sd) else: stddevs.append(None) for (index, el) in enumerate(series[time:]): if el is None: continue toDrop = series[index] if toDrop is None: toDrop = 0 s = safeSum([safeMul(time, avg), el, -toDrop]) avg = safeDiv(s, time) if avg is not None: (sd, sumOfSquares) = doStdDev(sumOfSquares, toDrop, series[index + time], time, avg) stddevs.append(sd) else: stddevs.append(None) for i in range(0, time - 1): stddevs.insert(0, None) seriesList[count] = stddevs count = count + 1 return seriesList
def divideSeries(requestContext, dividendSeriesList, divisorSeriesList): """ Takes a dividend metric and a divisor metric and draws the division result. A constant may *not* be passed. To divide by a constant, use the scale() function (which is essentially a multiplication operation) and use the inverse of the dividend. (Division by 8 = multiplication by 1/8 or 0.125) Example: .. code-block:: none &target=asPercent(Series.dividends,Series.divisors) """ if len(divisorSeriesList) != 1: raise ValueError("divideSeries second argument must reference exactly 1 series") divisorSeries = divisorSeriesList[0] results = [] for dividendSeries in dividendSeriesList: name = "divideSeries(%s,%s)" % (dividendSeries.name, divisorSeries.name) bothSeries = (dividendSeries, divisorSeries) step = reduce(lcm,[s.step for s in bothSeries]) for s in bothSeries: s.consolidate( step / s.step ) start = min([s.start for s in bothSeries]) end = max([s.end for s in bothSeries]) end -= (end - start) % step values = ( safeDiv(v1,v2) for v1,v2 in izip(*bothSeries) ) quotientSeries = TimeSeries(name, start, end, step, values) quotientSeries.pathExpression = name results.append(quotientSeries) return results
def averageSeries(requestContext, *seriesLists): """ Short Alias: avg() Takes one metric or a wildcard seriesList. Draws the average value of all metrics passed at each time. Example: .. code-block:: none &target=averageSeries(company.server.*.threads.busy) """ (seriesList,start,end,step) = normalize(seriesLists) #name = "averageSeries(%s)" % ','.join((s.name for s in seriesList)) name = "averageSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList])) values = ( safeDiv(safeSum(row),safeLen(row)) for row in izip(*seriesList) ) series = TimeSeries(name,start,end,step,values) series.pathExpression = name return [series]
def centered_mov_avg(requestContext, seriesList, windowSize): windowInterval = None if isinstance(windowSize, basestring): delta = functions.parseTimeOffset(windowSize) windowInterval = abs(delta.seconds + (delta.days * 86400)) if windowInterval: bootstrapSeconds = windowInterval else: bootstrapSeconds = max([s.step for s in seriesList]) * int(windowSize) bootstrapList = functions._fetchWithBootstrap(requestContext, seriesList, seconds=bootstrapSeconds) result = [] for bootstrap, series in zip(bootstrapList, seriesList): if windowInterval: windowPoints = windowInterval / series.step else: windowPoints = int(windowSize) if isinstance(windowSize, basestring): newName = 'centeredMovingAverage(%s,"%s")' % (series.name, windowSize) else: newName = "centeredMovingAverage(%s,%s)" % (series.name, windowSize) newSeries = TimeSeries(newName, series.start, series.end, series.step, []) newSeries.pathExpression = newName offset = len(bootstrap) - len(series) logging.info("Offset: %s", offset) logging.info("windowPoints: %s", windowPoints) for i in range(len(series)): window = bootstrap[i + offset - windowPoints + windowPoints / 2:i + offset + windowPoints / 2] logging.info("window: %s", len(window)) newSeries.append(functions.safeAvg(window)) result.append(newSeries) return result
def asPercent(requestContext, seriesList1, seriesList2orNumber): """ Takes exactly two metrics, or a metric and a constant. Draws the first metric as a percent of the second. Example: .. code-block:: none &target=asPercent(Server01.connections.failed,Server01.connections,total) &target=asPercent(apache01.threads.busy,1500) """ assert len(seriesList1) == 1, "asPercent series arguments must reference *exactly* 1 series" series1 = seriesList1[0] if type(seriesList2orNumber) is list: assert len(seriesList2orNumber) == 1, "asPercent series arguments must reference *exactly* 1 series" series2 = seriesList2orNumber[0] name = "asPercent(%s,%s)" % (series1.name,series2.name) series = (series1,series2) step = reduce(lcm,[s.step for s in series]) for s in series: s.consolidate( step / s.step ) start = min([s.start for s in series]) end = max([s.end for s in series]) end -= (end - start) % step values = ( safeMul( safeDiv(v1,v2), 100.0 ) for v1,v2 in izip(*series) ) else: number = float(seriesList2orNumber) name = "asPercent(%s,%.1f)" % (series1.name,number) step = series1.step start = series1.start end = series1.end values = ( safeMul( safeDiv(v,number), 100.0 ) for v in series1 ) series = TimeSeries(name,start,end,step,values) series.pathExpression = name return [series]
def summarize(requestContext, seriesList, intervalString): results = [] delta = parseTimeOffset(intervalString) interval = delta.seconds + (delta.days * 86400) for series in seriesList: buckets = {} timestamps = range(int(series.start), int(series.end), int(series.step)) datapoints = zip(timestamps, series) for (timestamp, value) in datapoints: bucketInterval = timestamp - (timestamp % interval) if bucketInterval not in buckets: buckets[bucketInterval] = [] if value is not None: buckets[bucketInterval].append(value) newStart = series.start - (series.start % interval) newEnd = series.end - (series.end % interval) + interval newValues = [] for timestamp in range(newStart, newEnd, interval): bucket = buckets.get(timestamp, []) if bucket: newValues.append(sum(bucket)) else: newValues.append(None) newName = "summarize(%s, \"%s\")" % (series.name, intervalString) newSeries = TimeSeries(newName, newStart, newEnd, interval, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def summarize(requestContext, seriesList, intervalString): results = [] delta = parseTimeOffset(intervalString) interval = delta.seconds + (delta.days * 86400) for series in seriesList: buckets = {} timestamps = range( int(series.start), int(series.end), int(series.step) ) datapoints = zip(timestamps, series) for (timestamp, value) in datapoints: bucketInterval = timestamp - (timestamp % interval) if bucketInterval not in buckets: buckets[bucketInterval] = [] if value is not None: buckets[bucketInterval].append(value) newStart = series.start - (series.start % interval) newEnd = series.end - (series.end % interval) + interval newValues = [] for timestamp in range(newStart, newEnd, interval): bucket = buckets.get(timestamp, []) if bucket: newValues.append( sum(bucket) ) else: newValues.append( None ) newName = "summarize(%s, \"%s\")" % (series.name, intervalString) newSeries = TimeSeries(newName, newStart, newEnd, interval, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def stacked(requestContext,seriesLists): """ Takes one metric or a wildcard seriesList and change them so they are stacked. This is a way of stacking just a couple of metrics without having to use the stacked area mode (that stacks everything). By means of this a mixed stacked and non stacked graph can be made Example: .. code-block:: none &target=stacked(company.server.application01.ifconfig.TXPackets) """ if 'totalStack' in requestContext: totalStack = requestContext['totalStack'] else: totalStack = []; results = [] for series in seriesLists: newValues = [] for i in range(len(series)): if len(totalStack) <= i: totalStack.append(0) if series[i] is not None: totalStack[i] += series[i] newValues.append(totalStack[i]) else: newValues.append(None) newName = "stacked(%s)" % series.name newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues) newSeries.options['stacked'] = True newSeries.pathExpression = newName results.append(newSeries) requestContext['totalStack'] = totalStack return results
def movingAverage(requestContext, seriesList, windowSize): for seriesIndex, series in enumerate(seriesList): newName = "movingAverage(%s,%.1f)" % (series.name, float(windowSize)) newSeries = TimeSeries(newName, series.start, series.end, series.step, []) newSeries.pathExpression = newName windowIndex = windowSize - 1 for i in range(len(series)): if i < windowIndex: # Pad the beginning with None's since we don't have enough data newSeries.append(None) else: window = series[i - windowIndex:i + 1] nonNull = [v for v in window if v is not None] if nonNull: newSeries.append(sum(nonNull) / len(nonNull)) else: newSeries.append(None) seriesList[seriesIndex] = newSeries return seriesList
def hitcount(requestContext, seriesList, intervalString): """ Estimate hit counts from a list of time series. This function assumes the values in each time series represent hits per second. It calculates hits per some larger interval such as per day or per hour. This function is like summarize(), except that it compensates automatically for different time scales (so that a similar graph results from using either fine-grained or coarse-grained records) and handles rarely-occurring events gracefully. """ results = [] delta = parseTimeOffset(intervalString) interval = int(delta.seconds + (delta.days * 86400)) for series in seriesList: length = len(series) step = int(series.step) bucket_count = int(math.ceil(float(series.end - series.start) / interval)) buckets = [[] for _ in range(bucket_count)] newStart = int(series.end - bucket_count * interval) for i, value in enumerate(series): if value is None: continue start_time = int(series.start + i * step) start_bucket, start_mod = divmod(start_time - newStart, interval) end_time = start_time + step end_bucket, end_mod = divmod(end_time - newStart, interval) if end_bucket >= bucket_count: end_bucket = bucket_count - 1 end_mod = interval if start_bucket == end_bucket: # All of the hits go to a single bucket. if start_bucket >= 0: buckets[start_bucket].append(value * (end_mod - start_mod)) else: # Spread the hits among 2 or more buckets. if start_bucket >= 0: buckets[start_bucket].append(value * (interval - start_mod)) hits_per_bucket = value * interval for j in range(start_bucket + 1, end_bucket): buckets[j].append(hits_per_bucket) if end_mod > 0: buckets[end_bucket].append(value * end_mod) newValues = [] for bucket in buckets: if bucket: newValues.append( sum(bucket) ) else: newValues.append(None) newName = 'hitcount(%s, "%s")' % (series.name, intervalString) newSeries = TimeSeries(newName, newStart, series.end, interval, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def hitcount(requestContext, seriesList, intervalString): """Estimate hit counts from a list of time series. This function assumes the values in each time series represent hits per second. It calculates hits per some larger interval such as per day or per hour. This function is like summarize(), except that it compensates automatically for different time scales (so that a similar graph results from using either fine-grained or coarse-grained records) and handles rarely-occurring events gracefully. """ results = [] delta = parseTimeOffset(intervalString) interval = int(delta.seconds + (delta.days * 86400)) for series in seriesList: length = len(series) step = int(series.step) bucket_count = int( math.ceil(float(series.end - series.start) / interval)) buckets = [[] for _ in range(bucket_count)] newStart = int(series.end - bucket_count * interval) for i, value in enumerate(series): if value is None: continue start_time = int(series.start + i * step) start_bucket, start_mod = divmod(start_time - newStart, interval) end_time = start_time + step end_bucket, end_mod = divmod(end_time - newStart, interval) if end_bucket >= bucket_count: end_bucket = bucket_count - 1 end_mod = interval if start_bucket == end_bucket: # All of the hits go to a single bucket. if start_bucket >= 0: buckets[start_bucket].append(value * (end_mod - start_mod)) else: # Spread the hits among 2 or more buckets. if start_bucket >= 0: buckets[start_bucket].append(value * (interval - start_mod)) hits_per_bucket = value * interval for j in range(start_bucket + 1, end_bucket): buckets[j].append(hits_per_bucket) if end_mod > 0: buckets[end_bucket].append(value * end_mod) newValues = [] for bucket in buckets: if bucket: newValues.append(sum(bucket)) else: newValues.append(None) newName = 'hitcount(%s, "%s")' % (series.name, intervalString) newSeries = TimeSeries(newName, newStart, series.end, interval, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def summarize(requestContext, seriesList, intervalString, func='sum', alignToFrom=False): """ Summarize the data into interval buckets of a certain size. By default, the contents of each interval bucket are summed together. This is useful for counters where each increment represents a discrete event and retrieving a "per X" value requires summing all the events in that interval. Specifying 'avg' instead will return the mean for each bucket, which can be more useful when the value is a gauge that represents a certain value in time. 'max', 'min' or 'last' can also be specified. By default, buckets are caculated by rounding to the nearest interval. This works well for intervals smaller than a day. For example, 22:32 will end up in the bucket 22:00-23:00 when the interval=1hour. Passing alignToFrom=true will instead create buckets starting at the from time. In this case, the bucket for 22:32 depends on the from time. If from=6:30 then the 1hour bucket for 22:32 is 22:30-23:30. Example: .. code-block:: none &target=summarize(counter.errors, "1hour") # total errors per hour &target=summarize(nonNegativeDerivative(gauge.num_users), "1week") # new users per week &target=summarize(queue.size, "1hour", "avg") # average queue size per hour &target=summarize(queue.size, "1hour", "max") # maximum queue size during each hour &target=summarize(metric, "13week", "avg", true)&from=midnight+20100101 # 2010 Q1-4 """ results = [] delta = parseTimeOffset(intervalString) interval = delta.seconds + (delta.days * 86400) for series in seriesList: buckets = {} timestamps = range( int(series.start), int(series.end), int(series.step) ) datapoints = zip(timestamps, series) for (timestamp, value) in datapoints: if alignToFrom: bucketInterval = int((timestamp - series.start) / interval) else: bucketInterval = timestamp - (timestamp % interval) if bucketInterval not in buckets: buckets[bucketInterval] = [] if value is not None: buckets[bucketInterval].append(value) if alignToFrom: newStart = series.start newEnd = series.end else: newStart = series.start - (series.start % interval) newEnd = series.end - (series.end % interval) + interval newValues = [] for timestamp in range(newStart, newEnd, interval): if alignToFrom: newEnd = timestamp bucketInterval = int((timestamp - series.start) / interval) else: bucketInterval = timestamp - (timestamp % interval) bucket = buckets.get(bucketInterval, []) if bucket: if func == 'avg': newValues.append( float(sum(bucket)) / float(len(bucket)) ) elif func == 'last': newValues.append( bucket[len(bucket)-1] ) elif func == 'max': newValues.append( max(bucket) ) elif func == 'min': newValues.append( min(bucket) ) else: newValues.append( sum(bucket) ) else: newValues.append( None ) if alignToFrom: newEnd += interval newName = "summarize(%s, \"%s\")" % (series.name, intervalString) newSeries = TimeSeries(newName, newStart, newEnd, interval, newValues) newSeries.pathExpression = newName results.append(newSeries) return results
def ASAP(requestContext, seriesList, resolution=1000): ''' use the ASAP smoothing on a series https://arxiv.org/pdf/1703.00983.pdf https://raw.githubusercontent.com/stanford-futuredata/ASAP/master/ASAP.py :param requestContext: :param seriesList: :param resolution: either number of points to keep or a time resolution :return: smoothed(seriesList) ''' if not seriesList: return [] windowInterval = None if isinstance(resolution, six.string_types): delta = parseTimeOffset(resolution) windowInterval = abs(delta.seconds + (delta.days * 86400)) if windowInterval: previewSeconds = windowInterval else: previewSeconds = max([s.step for s in seriesList]) * int(resolution) # ignore original data and pull new, including our preview # data from earlier is needed to calculate the early results newContext = requestContext.copy() newContext['startTime'] = (requestContext['startTime'] - timedelta(seconds=previewSeconds)) previewList = evaluateTokens(newContext, requestContext['args'][0]) result = [] for series in previewList: if windowInterval: # the resolution here is really the number of points to maintain # so we need to convert the "seconds" to num points windowPoints = round((series.end - series.start) / windowInterval) else: use_res = int(resolution) if len(series) < use_res: use_res = len(series) windowPoints = use_res if isinstance(resolution, six.string_types): newName = 'asap(%s,"%s")' % (series.name, resolution) else: newName = "asap(%s,%s)" % (series.name, resolution) step_guess = (series.end - series.start) // windowPoints newSeries = TimeSeries( newName, series.start, series.end, step_guess, [] ) newSeries.pathExpression = newName # detect "none" lists if len([v for v in series if v is not None]) <= 1: newSeries.extend(series) else: # the "resolution" is a suggestion, # the algo will alter it some inorder # to get the best view for things new_s = smooth(series, windowPoints) # steps need to be ints, so we must force the issue new_step = round((series.end - series.start) / len(new_s)) newSeries.step = new_step newSeries.extend(new_s) result.append(newSeries) return result