Ejemplo n.º 1
0
    def test_linearRegression(self):
        original = functions.evaluateTarget
        try:
            # series starts at 60 seconds past the epoch and continues for 600 seconds (ten minutes)
            # steps are every 60 seconds
            savedSeries = TimeSeries('test.value',180,480,60,[3,None,5,6,None,8]),
            functions.evaluateTarget = lambda x, y: savedSeries

            # input values will be ignored and replaced by regression function
            inputSeries = TimeSeries('test.value',1200,1500,60,[123,None,None,456,None,None,None])
            inputSeries.pathExpression = 'test.value'
            results = functions.linearRegression({
                'startTime': datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)),
                'endTime': datetime(1970, 1, 1, 0, 25, 0, 0, pytz.timezone(settings.TIME_ZONE)),
                'localOnly': False,
                'data': [],
            }, [ inputSeries ], '00:03 19700101', '00:08 19700101')

            # regression function calculated from datapoints on minutes 3 to 8
            expectedResult = [
                TimeSeries('linearRegression(test.value, 180, 480)',1200,1500,60,[20.0,21.0,22.0,23.0,24.0,25.0,26.0])
            ]

            self.assertEqual(results, expectedResult)
        finally:
            functions.evaluateTarget = original
Ejemplo n.º 2
0
    def test_linearRegression(self):
        original = functions.evaluateTarget
        try:
            # series starts at 60 seconds past the epoch and continues for 600 seconds (ten minutes)
            # steps are every 60 seconds
            savedSeries = TimeSeries('test.value',180,480,60,[3,None,5,6,None,8]),
            functions.evaluateTarget = lambda x, y: savedSeries

            # input values will be ignored and replaced by regression function
            inputSeries = TimeSeries('test.value',1200,1500,60,[123,None,None,456,None,None,None])
            inputSeries.pathExpression = 'test.value'
            results = functions.linearRegression({
                'startTime': datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)),
                'endTime': datetime(1970, 1, 1, 0, 25, 0, 0, pytz.timezone(settings.TIME_ZONE)),
                'localOnly': False,
                'data': [],
            }, [ inputSeries ], '00:03 19700101', '00:08 19700101')

            # regression function calculated from datapoints on minutes 3 to 8
            expectedResult = [
                TimeSeries('linearRegression(test.value, 180, 480)',1200,1500,60,[20.0,21.0,22.0,23.0,24.0,25.0,26.0])
            ]

            self.assertEqual(results, expectedResult)
        finally:
            functions.evaluateTarget = original
Ejemplo n.º 3
0
def derivative(requestContext, seriesList):
  """

  This is the opposite of the integral function.  This is useful for taking a
  running total metric and showing how many requests per minute were handled. 

  Example:

  .. code-block:: none

    &target=derivative(company.server.application01.ifconfig.TXPackets)

  Each time you run ifconfig, the RX and TXPackets are higher (assuming there
  is network traffic.) By applying the derivative function, you can get an 
  idea of the packets per minute sent or received, even though you're only 
  recording the total.
  """
  results = []
  for series in seriesList:
    newValues = []
    prev = None
    for val in series:
      if None in (prev,val):
        newValues.append(None)
        prev = val
        continue
      newValues.append(val - prev)
      prev = val
    newName = "derivative(%s)" % series.name
    newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues)
    newSeries.pathExpression = newName
    results.append(newSeries)
  return results
Ejemplo n.º 4
0
def log(requestContext, seriesList, base=10):
  """

  Takes one metric or a wildcard seriesList, a base, and draws the y-axis in logarithmic 
  format.  If base is omitted, the function defaults to base 10.

  Example:

  .. code-block:: none

    &target=log(carbon.agents.hostname.avgUpdateTime,2)

  """
  results = []
  for series in seriesList:
    newValues = []
    for val in series:
      if val is None:
        newValues.append(None)
      elif val <= 0:
        newValues.append(None)
      else:
        newValues.append(math.log(val, base))
    newName = "log(%s, %s)" % (series.name, base)
    newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues)
    newSeries.pathExpression = newName
    results.append(newSeries)
  return results
Ejemplo n.º 5
0
def movingAverage(requestContext, seriesList, windowSize):
  """

  Takes one metric or a wildcard seriesList followed by a number N of datapoints and graphs 
  the average of N previous datapoints.  N-1 datapoints are set to None at the
  beginning of the graph.

  .. code-block:: none

    &target=movingAverage(Server.instance01.threads.busy,10)

  """
  for seriesIndex, series in enumerate(seriesList):
    newName = "movingAverage(%s,%.1f)" % (series.name, float(windowSize))
    newSeries = TimeSeries(newName, series.start, series.end, series.step, [])
    newSeries.pathExpression = newName

    windowIndex = windowSize - 1

    for i in range( len(series) ):
      if i < windowIndex: # Pad the beginning with None's since we don't have enough data
        newSeries.append( None )

      else:
        window = series[i - windowIndex : i + 1]
        nonNull = [ v for v in window if v is not None ]
        if nonNull:
          newSeries.append( sum(nonNull) / len(nonNull) )
        else:
          newSeries.append(None)

    seriesList[ seriesIndex ] = newSeries

  return seriesList
Ejemplo n.º 6
0
def asPercent(requestContext, seriesList1, seriesList2orNumber):
    assert len(
        seriesList1
    ) == 1, "asPercent series arguments must reference *exactly* 1 series"
    series1 = seriesList1[0]
    if type(seriesList2orNumber) is list:
        assert len(
            seriesList2orNumber
        ) == 1, "asPercent series arguments must reference *exactly* 1 series"
        series2 = seriesList2orNumber[0]
        name = "asPercent(%s,%s)" % (series1.name, series2.name)
        series = (series1, series2)
        step = reduce(lcm, [s.step for s in series])
        for s in series:
            s.consolidate(step / s.step)
        start = min([s.start for s in series])
        end = max([s.end for s in series])
        end -= (end - start) % step
        values = (safeMul(safeDiv(v1, v2), 100.0) for v1, v2 in izip(*series))
    else:
        number = float(seriesList2orNumber)
        name = "asPercent(%s,%.1f)" % (series1.name, number)
        step = series1.step
        start = series1.start
        end = series1.end
        values = (safeMul(safeDiv(v, number), 100.0) for v in series1)
    series = TimeSeries(name, start, end, step, values)
    series.pathExpression = name
    return [series]
Ejemplo n.º 7
0
def sumSeries(requestContext, *seriesLists):
  """
  Short form: sum()

  This will add metrics together and return the sum at each datapoint. (See 
  integral for a sum over time)

  Example:

  .. code-block:: none

    &target=sum(company.server.application*.requestsHandled)

  This would show the sum of all requests handled per minute (provided 
  requestsHandled are collected once a minute).   If metrics with different 
  retention rates are combined, the coarsest metric is graphed, and the sum
  of the other metrics is averaged for the metrics with finer retention rates.

  """

  try:
    (seriesList,start,end,step) = normalize(seriesLists)
  except:
    return []
  #name = "sumSeries(%s)" % ','.join((s.name for s in seriesList))
  name = "sumSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList]))
  values = ( safeSum(row) for row in izip(*seriesList) )
  series = TimeSeries(name,start,end,step,values)
  series.pathExpression = name
  return [series]
Ejemplo n.º 8
0
def divideSeries(requestContext, dividendSeriesList, divisorSeriesList):
    if len(divisorSeriesList) != 1:
        raise ValueError(
            "divideSeries second argument must reference exactly 1 series")

    divisorSeries = divisorSeriesList[0]
    results = []

    for dividendSeries in dividendSeriesList:
        name = "divideSeries(%s,%s)" % (dividendSeries.name,
                                        divisorSeries.name)
        bothSeries = (dividendSeries, divisorSeries)
        step = reduce(lcm, [s.step for s in bothSeries])

        for s in bothSeries:
            s.consolidate(step / s.step)

        start = min([s.start for s in bothSeries])
        end = max([s.end for s in bothSeries])
        end -= (end - start) % step

        values = (safeDiv(v1, v2) for v1, v2 in izip(*bothSeries))

        quotientSeries = TimeSeries(name, start, end, step, values)
        quotientSeries.pathExpression = name
        results.append(quotientSeries)

    return results
Ejemplo n.º 9
0
def nonNegativeDerivative(requestContext, seriesList, maxValue=None):
    results = []

    for series in seriesList:
        newValues = []
        prev = None

        for val in series:
            if None in (prev, val):
                newValues.append(None)
                prev = val
                continue

            diff = val - prev
            if diff >= 0:
                newValues.append(diff)
            elif maxValue is not None and maxValue >= val:
                newValues.append((maxValue - prev) + val + 1)
            else:
                newValues.append(None)

            prev = val

        newName = "nonNegativeDerivative(%s)" % series.name
        newSeries = TimeSeries(newName, series.start, series.end, series.step,
                               newValues)
        newSeries.pathExpression = newName
        results.append(newSeries)

    return results
Ejemplo n.º 10
0
def integral(requestContext, seriesList):
  """

  This will show the sum over time, sort of like a continuous addition function.  
  Useful for finding totals or trends in metrics that are collected per minute. 

  Example: 

  .. code-block:: none

    &target=integral(company.sales.perMinute)

  This would start at zero on the left side of the graph, adding the sales each
  minute, and show the total sales for the time period selected at the right 
  side, (time now, or the time specified by '&until=').
  """
  results = []
  for series in seriesList:
    newValues = []
    current = 0.0
    for val in series:
      if val is None:
        newValues.append(None)
      else:
        current += val
        newValues.append(current)
    newName = "integral(%s)" % series.name
    newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues)
    newSeries.pathExpression = newName
    results.append(newSeries)
  return results
Ejemplo n.º 11
0
def percentileOfSeries(requestContext, *args):
    levels = []
    seriesList = []
    for arg in args:
        logging.info("Arg: %s", arg)
        if isinstance(arg, (int, long, float)):
            levels.append(arg)
        elif isinstance(arg, basestring):
            levels += [float(x) for x in arg.split(";")]
        else:
            seriesList += arg

    logging.info("Levels: %s", levels)
    logging.info("Series: %s", seriesList)

    result = []
    for level in levels:
        if levels <= 0:
            raise ValueError('The requested percent is required to be greater than 0')

        name = 'percentilesOfSeries(%s,%g)' % (seriesList[0].pathExpression, level)
        (start, end, step) = functions.normalize([seriesList])[1:]
        values = [functions._getPercentile(row, level, False) for row in functions.izip(*seriesList)]
        resultSeries = TimeSeries(name, start, end, step, values)
        resultSeries.pathExpression = name
        result.append(resultSeries)

    return result
Ejemplo n.º 12
0
def maxSeries(requestContext, *seriesLists):
    (seriesList, start, end, step) = normalize(seriesLists)
    pathExprs = list(set([s.pathExpression for s in seriesList]))
    name = "maxSeries(%s)" % ','.join(pathExprs)
    values = (safeMax(row) for row in izip(*seriesList))
    series = TimeSeries(name, start, end, step, values)
    series.pathExpression = name
    return [series]
Ejemplo n.º 13
0
def averageSeries(requestContext, *seriesLists):
    (seriesList, start, end, step) = normalize(seriesLists)
    #name = "averageSeries(%s)" % ','.join((s.name for s in seriesList))
    name = "averageSeries(%s)" % ','.join(
        set([s.pathExpression for s in seriesList]))
    values = (safeDiv(safeSum(row), safeLen(row)) for row in izip(*seriesList))
    series = TimeSeries(name, start, end, step, values)
    series.pathExpression = name
    return [series]
Ejemplo n.º 14
0
def sumSeries(requestContext, *seriesLists):
    try:
        (seriesList, start, end, step) = normalize(seriesLists)
    except:
        return []
    #name = "sumSeries(%s)" % ','.join((s.name for s in seriesList))
    name = "sumSeries(%s)" % ','.join(
        set([s.pathExpression for s in seriesList]))
    values = (safeSum(row) for row in izip(*seriesList))
    series = TimeSeries(name, start, end, step, values)
    series.pathExpression = name
    return [series]
Ejemplo n.º 15
0
def stdev(requestContext, seriesList, time):
  """

  Takes one metric or a wildcard seriesList followed by an integer N.
  Draw the Standard Deviation of all metrics passed for the past N datapoints. 
  
  Example:
  
  .. code-block:: none

    &target=stddev(server*.instance*.threads.busy,30)

  """
  
  count = 0
  for series in seriesList:
    stddevs = TimeSeries("stddev(%s,%.1f)" % (series.name, float(time)), series.start, series.end, series.step, [])
    stddevs.pathExpression = "stddev(%s,%.1f)" % (series.name, float(time))
    avg = safeDiv(safeSum(series[:time]), time)

    if avg is not None:
      sumOfSquares = sum(map(lambda(x): x * x, [v for v in series[:time] if v is not None]))
      (sd, sumOfSquares) = doStdDev(sumOfSquares, 0, 0, time, avg)
      stddevs.append(sd)
    else:
      stddevs.append(None)

    for (index, el) in enumerate(series[time:]):
      if el is None:
        continue

      toDrop = series[index]
      if toDrop is None:
        toDrop = 0

      s = safeSum([safeMul(time, avg), el, -toDrop])
      avg = safeDiv(s, time)

      if avg is not None:
        (sd, sumOfSquares) = doStdDev(sumOfSquares, toDrop, series[index+time], time, avg)
        stddevs.append(sd)
      else:
        stddevs.append(None)

    for i in range(0, time-1):
      stddevs.insert(0, None)

    seriesList[count] = stddevs
    count = count + 1

  return seriesList
Ejemplo n.º 16
0
def log(requestContext, seriesList, base=10):
    results = []
    for series in seriesList:
        newValues = []
        for val in series:
            if val is None:
                newValues.append(None)
            elif val <= 0:
                newValues.append(None)
            else:
                newValues.append(math.log(val, base))
        newName = "log(%s, %s)" % (series.name, base)
        newSeries = TimeSeries(newName, series.start, series.end, series.step,
                               newValues)
        newSeries.pathExpression = newName
        results.append(newSeries)
    return results
Ejemplo n.º 17
0
def integral(requestContext, seriesList):
    results = []
    for series in seriesList:
        newValues = []
        current = 0.0
        for val in series:
            if val is None:
                newValues.append(None)
            else:
                current += val
                newValues.append(current)
        newName = "integral(%s)" % series.name
        newSeries = TimeSeries(newName, series.start, series.end, series.step,
                               newValues)
        newSeries.pathExpression = newName
        results.append(newSeries)
    return results
Ejemplo n.º 18
0
def derivative(requestContext, seriesList):
    results = []
    for series in seriesList:
        newValues = []
        prev = None
        for val in series:
            if None in (prev, val):
                newValues.append(None)
                prev = val
                continue
            newValues.append(val - prev)
            prev = val
        newName = "derivative(%s)" % series.name
        newSeries = TimeSeries(newName, series.start, series.end, series.step,
                               newValues)
        newSeries.pathExpression = newName
        results.append(newSeries)
    return results
Ejemplo n.º 19
0
def minSeries(requestContext, *seriesLists):
  """
  Takes one metric or a wildcard seriesList.
  For each datapoint from each metric passed in, pick the minimum value and graph it.

  Example:

  .. code-block:: none
      
    &target=minSeries(Server*.connections.total)
  """
  (seriesList, start, end, step) = normalize(seriesLists)
  pathExprs = list( set([s.pathExpression for s in seriesList]) )
  name = "minSeries(%s)" % ','.join(pathExprs)
  values = ( safeMin(row) for row in izip(*seriesList) )
  series = TimeSeries(name, start, end, step, values)
  series.pathExpression = name
  return [series]
Ejemplo n.º 20
0
def diffSeries(requestContext, *seriesLists):
  """
  Can take two or more metrics, or a single metric and a constant.
  Subtracts parameters 2 through n from parameter 1.

  Example:

  .. code-block:: none
    
    &target=diffSeries(service.connections.total,service.connections.failed)
    &target=diffSeries(service.connections.total,5)

  """
  (seriesList,start,end,step) = normalize(seriesLists)
  name = "diffSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList]))
  values = ( safeDiff(row) for row in izip(*seriesList) )
  series = TimeSeries(name,start,end,step,values)
  series.pathExpression = name
  return [series]
Ejemplo n.º 21
0
def nonNegativeDerivative(requestContext, seriesList, maxValue=None):
  """

  Same as the derivative function above, but ignores datapoints that trend 
  down.  Useful for counters that increase for a long time, then wrap or 
  reset. (Such as if a network interface is destroyed and recreated by unloading 
  and re-loading a kernel module, common with USB / WiFi cards.

  Example:

  .. code-block:: none
        
    &target=derivative(company.server.application01.ifconfig.TXPackets)

  """
  results = []

  for series in seriesList:
    newValues = []
    prev = None

    for val in series:
      if None in (prev, val):
        newValues.append(None)
        prev = val
        continue

      diff = val - prev
      if diff >= 0:
        newValues.append(diff)
      elif maxValue is not None and maxValue >= val:
        newValues.append( (maxValue - prev) + val  + 1 )
      else:
        newValues.append(None)

      prev = val

    newName = "nonNegativeDerivative(%s)" % series.name
    newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues)
    newSeries.pathExpression = newName
    results.append(newSeries)

  return results
Ejemplo n.º 22
0
def stdev(requestContext, seriesList, time):
    count = 0
    for series in seriesList:
        stddevs = TimeSeries("stddev(%s,%.1f)" % (series.name, float(time)),
                             series.start, series.end, series.step, [])
        stddevs.pathExpression = "stddev(%s,%.1f)" % (series.name, float(time))
        avg = safeDiv(safeSum(series[:time]), time)

        if avg is not None:
            sumOfSquares = sum(
                map(lambda (x): x * x,
                    [v for v in series[:time] if v is not None]))
            (sd, sumOfSquares) = doStdDev(sumOfSquares, 0, 0, time, avg)
            stddevs.append(sd)
        else:
            stddevs.append(None)

        for (index, el) in enumerate(series[time:]):
            if el is None:
                continue

            toDrop = series[index]
            if toDrop is None:
                toDrop = 0

            s = safeSum([safeMul(time, avg), el, -toDrop])
            avg = safeDiv(s, time)

            if avg is not None:
                (sd, sumOfSquares) = doStdDev(sumOfSquares, toDrop,
                                              series[index + time], time, avg)
                stddevs.append(sd)
            else:
                stddevs.append(None)

        for i in range(0, time - 1):
            stddevs.insert(0, None)

        seriesList[count] = stddevs
        count = count + 1

    return seriesList
Ejemplo n.º 23
0
def divideSeries(requestContext, dividendSeriesList, divisorSeriesList):
  """

  Takes a dividend metric and a divisor metric and draws the division result.
  A constant may *not* be passed. To divide by a constant, use the scale() 
  function (which is essentially a multiplication operation) and use the inverse
  of the dividend. (Division by 8 = multiplication by 1/8 or 0.125)

  Example:

  .. code-block:: none

    &target=asPercent(Series.dividends,Series.divisors)


  """
  if len(divisorSeriesList) != 1:
    raise ValueError("divideSeries second argument must reference exactly 1 series")

  divisorSeries = divisorSeriesList[0]
  results = []

  for dividendSeries in dividendSeriesList:
    name = "divideSeries(%s,%s)" % (dividendSeries.name, divisorSeries.name)
    bothSeries = (dividendSeries, divisorSeries)
    step = reduce(lcm,[s.step for s in bothSeries])

    for s in bothSeries:
      s.consolidate( step / s.step )

    start = min([s.start for s in bothSeries])
    end = max([s.end for s in bothSeries])
    end -= (end - start) % step

    values = ( safeDiv(v1,v2) for v1,v2 in izip(*bothSeries) )

    quotientSeries = TimeSeries(name, start, end, step, values)
    quotientSeries.pathExpression = name
    results.append(quotientSeries)

  return results
Ejemplo n.º 24
0
def averageSeries(requestContext, *seriesLists):
  """
  Short Alias: avg()

  Takes one metric or a wildcard seriesList.
  Draws the average value of all metrics passed at each time.

  Example:

  .. code-block:: none

    &target=averageSeries(company.server.*.threads.busy)
  
  """
  (seriesList,start,end,step) = normalize(seriesLists)
  #name = "averageSeries(%s)" % ','.join((s.name for s in seriesList))
  name = "averageSeries(%s)" % ','.join(set([s.pathExpression for s in seriesList]))
  values = ( safeDiv(safeSum(row),safeLen(row)) for row in izip(*seriesList) )
  series = TimeSeries(name,start,end,step,values)
  series.pathExpression = name
  return [series]
Ejemplo n.º 25
0
def centered_mov_avg(requestContext, seriesList, windowSize):
    windowInterval = None
    if isinstance(windowSize, basestring):
        delta = functions.parseTimeOffset(windowSize)
        windowInterval = abs(delta.seconds + (delta.days * 86400))

    if windowInterval:
        bootstrapSeconds = windowInterval
    else:
        bootstrapSeconds = max([s.step for s in seriesList]) * int(windowSize)

    bootstrapList = functions._fetchWithBootstrap(requestContext, seriesList, seconds=bootstrapSeconds)
    result = []

    for bootstrap, series in zip(bootstrapList, seriesList):
        if windowInterval:
            windowPoints = windowInterval / series.step
        else:
            windowPoints = int(windowSize)

        if isinstance(windowSize, basestring):
            newName = 'centeredMovingAverage(%s,"%s")' % (series.name, windowSize)
        else:
            newName = "centeredMovingAverage(%s,%s)" % (series.name, windowSize)
        newSeries = TimeSeries(newName, series.start, series.end, series.step, [])
        newSeries.pathExpression = newName

        offset = len(bootstrap) - len(series)
        logging.info("Offset: %s", offset)
        logging.info("windowPoints: %s", windowPoints)

        for i in range(len(series)):
            window = bootstrap[i + offset - windowPoints + windowPoints / 2:i + offset + windowPoints / 2]
            logging.info("window: %s", len(window))
            newSeries.append(functions.safeAvg(window))

        result.append(newSeries)

    return result
Ejemplo n.º 26
0
def asPercent(requestContext, seriesList1, seriesList2orNumber):
  """

  Takes exactly two metrics, or a metric and a constant.
  Draws the first metric as a percent of the second.

  Example:

  .. code-block:: none

    &target=asPercent(Server01.connections.failed,Server01.connections,total)
    &target=asPercent(apache01.threads.busy,1500)

  """
  assert len(seriesList1) == 1, "asPercent series arguments must reference *exactly* 1 series"
  series1 = seriesList1[0]
  if type(seriesList2orNumber) is list:
    assert len(seriesList2orNumber) == 1, "asPercent series arguments must reference *exactly* 1 series"
    series2 = seriesList2orNumber[0]
    name = "asPercent(%s,%s)" % (series1.name,series2.name)
    series = (series1,series2)
    step = reduce(lcm,[s.step for s in series])
    for s in series:
      s.consolidate( step / s.step )
    start = min([s.start for s in series])
    end = max([s.end for s in series])
    end -= (end - start) % step
    values = ( safeMul( safeDiv(v1,v2), 100.0 ) for v1,v2 in izip(*series) )
  else:
    number = float(seriesList2orNumber)
    name = "asPercent(%s,%.1f)" % (series1.name,number)
    step = series1.step
    start = series1.start
    end = series1.end
    values = ( safeMul( safeDiv(v,number), 100.0 ) for v in series1 )
  series = TimeSeries(name,start,end,step,values)
  series.pathExpression = name
  return [series]
Ejemplo n.º 27
0
def summarize(requestContext, seriesList, intervalString):
    results = []
    delta = parseTimeOffset(intervalString)
    interval = delta.seconds + (delta.days * 86400)

    for series in seriesList:
        buckets = {}

        timestamps = range(int(series.start), int(series.end),
                           int(series.step))
        datapoints = zip(timestamps, series)

        for (timestamp, value) in datapoints:
            bucketInterval = timestamp - (timestamp % interval)

            if bucketInterval not in buckets:
                buckets[bucketInterval] = []

            if value is not None:
                buckets[bucketInterval].append(value)

        newStart = series.start - (series.start % interval)
        newEnd = series.end - (series.end % interval) + interval
        newValues = []
        for timestamp in range(newStart, newEnd, interval):
            bucket = buckets.get(timestamp, [])

            if bucket:
                newValues.append(sum(bucket))
            else:
                newValues.append(None)

        newName = "summarize(%s, \"%s\")" % (series.name, intervalString)
        newSeries = TimeSeries(newName, newStart, newEnd, interval, newValues)
        newSeries.pathExpression = newName
        results.append(newSeries)

    return results
Ejemplo n.º 28
0
def summarize(requestContext, seriesList, intervalString):
  results = []
  delta = parseTimeOffset(intervalString)
  interval = delta.seconds + (delta.days * 86400)

  for series in seriesList:
    buckets = {}

    timestamps = range( int(series.start), int(series.end), int(series.step) )
    datapoints = zip(timestamps, series)

    for (timestamp, value) in datapoints:
      bucketInterval = timestamp - (timestamp % interval)

      if bucketInterval not in buckets:
        buckets[bucketInterval] = []

      if value is not None:
        buckets[bucketInterval].append(value)

    newStart = series.start - (series.start % interval)
    newEnd = series.end - (series.end % interval) + interval
    newValues = []
    for timestamp in range(newStart, newEnd, interval):
      bucket = buckets.get(timestamp, [])

      if bucket:
        newValues.append( sum(bucket) )
      else:
        newValues.append( None )

    newName = "summarize(%s, \"%s\")" % (series.name, intervalString)
    newSeries = TimeSeries(newName, newStart, newEnd, interval, newValues)
    newSeries.pathExpression = newName
    results.append(newSeries)

  return results
Ejemplo n.º 29
0
def stacked(requestContext,seriesLists):
  """
  Takes one metric or a wildcard seriesList and change them so they are
  stacked. This is a way of stacking just a couple of metrics without having
  to use the stacked area mode (that stacks everything). By means of this a mixed
  stacked and non stacked graph can be made

  Example:

  .. code-block:: none

    &target=stacked(company.server.application01.ifconfig.TXPackets)

  """
  if 'totalStack' in requestContext:
    totalStack = requestContext['totalStack']
  else:
    totalStack = [];
  results = []
  for series in seriesLists:
    newValues = []
    for i in range(len(series)):
      if len(totalStack) <= i: totalStack.append(0)

      if series[i] is not None:
        totalStack[i] += series[i]
        newValues.append(totalStack[i])
      else:
        newValues.append(None)

    newName = "stacked(%s)" % series.name
    newSeries = TimeSeries(newName, series.start, series.end, series.step, newValues)
    newSeries.options['stacked'] = True
    newSeries.pathExpression = newName
    results.append(newSeries)
  requestContext['totalStack'] = totalStack
  return results
Ejemplo n.º 30
0
def movingAverage(requestContext, seriesList, windowSize):
    for seriesIndex, series in enumerate(seriesList):
        newName = "movingAverage(%s,%.1f)" % (series.name, float(windowSize))
        newSeries = TimeSeries(newName, series.start, series.end, series.step,
                               [])
        newSeries.pathExpression = newName

        windowIndex = windowSize - 1

        for i in range(len(series)):
            if i < windowIndex:  # Pad the beginning with None's since we don't have enough data
                newSeries.append(None)

            else:
                window = series[i - windowIndex:i + 1]
                nonNull = [v for v in window if v is not None]
                if nonNull:
                    newSeries.append(sum(nonNull) / len(nonNull))
                else:
                    newSeries.append(None)

        seriesList[seriesIndex] = newSeries

    return seriesList
Ejemplo n.º 31
0
def hitcount(requestContext, seriesList, intervalString):
  """
  Estimate hit counts from a list of time series.

  This function assumes the values in each time series represent
  hits per second.  It calculates hits per some larger interval
  such as per day or per hour.  This function is like summarize(),
  except that it compensates automatically for different time scales
  (so that a similar graph results from using either fine-grained
  or coarse-grained records) and handles rarely-occurring events
  gracefully.
  """
  results = []
  delta = parseTimeOffset(intervalString)
  interval = int(delta.seconds + (delta.days * 86400))

  for series in seriesList:
    length = len(series)
    step = int(series.step)
    bucket_count = int(math.ceil(float(series.end - series.start) / interval))
    buckets = [[] for _ in range(bucket_count)]
    newStart = int(series.end - bucket_count * interval)

    for i, value in enumerate(series):
      if value is None:
        continue

      start_time = int(series.start + i * step)
      start_bucket, start_mod = divmod(start_time - newStart, interval)
      end_time = start_time + step
      end_bucket, end_mod = divmod(end_time - newStart, interval)

      if end_bucket >= bucket_count:
        end_bucket = bucket_count - 1
        end_mod = interval

      if start_bucket == end_bucket:
        # All of the hits go to a single bucket.
        if start_bucket >= 0:
          buckets[start_bucket].append(value * (end_mod - start_mod))

      else:
        # Spread the hits among 2 or more buckets.
        if start_bucket >= 0:
          buckets[start_bucket].append(value * (interval - start_mod))
        hits_per_bucket = value * interval
        for j in range(start_bucket + 1, end_bucket):
          buckets[j].append(hits_per_bucket)
        if end_mod > 0:
          buckets[end_bucket].append(value * end_mod)

    newValues = []
    for bucket in buckets:
      if bucket:
        newValues.append( sum(bucket) )
      else:
        newValues.append(None)

    newName = 'hitcount(%s, "%s")' % (series.name, intervalString)
    newSeries = TimeSeries(newName, newStart, series.end, interval, newValues)
    newSeries.pathExpression = newName
    results.append(newSeries)

  return results
Ejemplo n.º 32
0
def hitcount(requestContext, seriesList, intervalString):
    """Estimate hit counts from a list of time series.

  This function assumes the values in each time series represent
  hits per second.  It calculates hits per some larger interval
  such as per day or per hour.  This function is like summarize(),
  except that it compensates automatically for different time scales
  (so that a similar graph results from using either fine-grained
  or coarse-grained records) and handles rarely-occurring events
  gracefully.
  """
    results = []
    delta = parseTimeOffset(intervalString)
    interval = int(delta.seconds + (delta.days * 86400))

    for series in seriesList:
        length = len(series)
        step = int(series.step)
        bucket_count = int(
            math.ceil(float(series.end - series.start) / interval))
        buckets = [[] for _ in range(bucket_count)]
        newStart = int(series.end - bucket_count * interval)

        for i, value in enumerate(series):
            if value is None:
                continue

            start_time = int(series.start + i * step)
            start_bucket, start_mod = divmod(start_time - newStart, interval)
            end_time = start_time + step
            end_bucket, end_mod = divmod(end_time - newStart, interval)

            if end_bucket >= bucket_count:
                end_bucket = bucket_count - 1
                end_mod = interval

            if start_bucket == end_bucket:
                # All of the hits go to a single bucket.
                if start_bucket >= 0:
                    buckets[start_bucket].append(value * (end_mod - start_mod))

            else:
                # Spread the hits among 2 or more buckets.
                if start_bucket >= 0:
                    buckets[start_bucket].append(value *
                                                 (interval - start_mod))
                hits_per_bucket = value * interval
                for j in range(start_bucket + 1, end_bucket):
                    buckets[j].append(hits_per_bucket)
                if end_mod > 0:
                    buckets[end_bucket].append(value * end_mod)

        newValues = []
        for bucket in buckets:
            if bucket:
                newValues.append(sum(bucket))
            else:
                newValues.append(None)

        newName = 'hitcount(%s, "%s")' % (series.name, intervalString)
        newSeries = TimeSeries(newName, newStart, series.end, interval,
                               newValues)
        newSeries.pathExpression = newName
        results.append(newSeries)

    return results
Ejemplo n.º 33
0
def summarize(requestContext, seriesList, intervalString, func='sum', alignToFrom=False):
  """
  Summarize the data into interval buckets of a certain size.

  By default, the contents of each interval bucket are summed together. This is
  useful for counters where each increment represents a discrete event and
  retrieving a "per X" value requires summing all the events in that interval.

  Specifying 'avg' instead will return the mean for each bucket, which can be more
  useful when the value is a gauge that represents a certain value in time.

  'max', 'min' or 'last' can also be specified.

  By default, buckets are caculated by rounding to the nearest interval. This
  works well for intervals smaller than a day. For example, 22:32 will end up
  in the bucket 22:00-23:00 when the interval=1hour.

  Passing alignToFrom=true will instead create buckets starting at the from
  time. In this case, the bucket for 22:32 depends on the from time. If
  from=6:30 then the 1hour bucket for 22:32 is 22:30-23:30.

  Example:

  .. code-block:: none

    &target=summarize(counter.errors, "1hour") # total errors per hour
    &target=summarize(nonNegativeDerivative(gauge.num_users), "1week") # new users per week
    &target=summarize(queue.size, "1hour", "avg") # average queue size per hour
    &target=summarize(queue.size, "1hour", "max") # maximum queue size during each hour
    &target=summarize(metric, "13week", "avg", true)&from=midnight+20100101 # 2010 Q1-4
  """
  results = []
  delta = parseTimeOffset(intervalString)
  interval = delta.seconds + (delta.days * 86400)

  for series in seriesList:
    buckets = {}

    timestamps = range( int(series.start), int(series.end), int(series.step) )
    datapoints = zip(timestamps, series)

    for (timestamp, value) in datapoints:
      if alignToFrom:
        bucketInterval = int((timestamp - series.start) / interval)
      else:
        bucketInterval = timestamp - (timestamp % interval)

      if bucketInterval not in buckets:
        buckets[bucketInterval] = []

      if value is not None:
        buckets[bucketInterval].append(value)

    if alignToFrom:
      newStart = series.start
      newEnd = series.end
    else:
      newStart = series.start - (series.start % interval)
      newEnd = series.end - (series.end % interval) + interval

    newValues = []
    for timestamp in range(newStart, newEnd, interval):
      if alignToFrom:
        newEnd = timestamp
        bucketInterval = int((timestamp - series.start) / interval)
      else:
        bucketInterval = timestamp - (timestamp % interval)

      bucket = buckets.get(bucketInterval, [])

      if bucket:
        if func == 'avg':
          newValues.append( float(sum(bucket)) / float(len(bucket)) )
        elif func == 'last':
          newValues.append( bucket[len(bucket)-1] )
        elif func == 'max':
          newValues.append( max(bucket) )
        elif func == 'min':
          newValues.append( min(bucket) )
        else:
          newValues.append( sum(bucket) )
      else:
        newValues.append( None )

    if alignToFrom:
      newEnd += interval

    newName = "summarize(%s, \"%s\")" % (series.name, intervalString)
    newSeries = TimeSeries(newName, newStart, newEnd, interval, newValues)
    newSeries.pathExpression = newName
    results.append(newSeries)

  return results
Ejemplo n.º 34
0
def ASAP(requestContext, seriesList, resolution=1000):
    '''
    use the ASAP smoothing on a series
    https://arxiv.org/pdf/1703.00983.pdf
    https://raw.githubusercontent.com/stanford-futuredata/ASAP/master/ASAP.py
    :param requestContext:
    :param seriesList:
    :param resolution: either number of points to keep or a time resolution
    :return: smoothed(seriesList)
    '''

    if not seriesList:
        return []

    windowInterval = None
    if isinstance(resolution, six.string_types):
        delta = parseTimeOffset(resolution)
        windowInterval = abs(delta.seconds + (delta.days * 86400))

    if windowInterval:
        previewSeconds = windowInterval
    else:
        previewSeconds = max([s.step for s in seriesList]) * int(resolution)

    # ignore original data and pull new, including our preview
    # data from earlier is needed to calculate the early results
    newContext = requestContext.copy()
    newContext['startTime'] = (requestContext['startTime'] -
                               timedelta(seconds=previewSeconds))
    previewList = evaluateTokens(newContext, requestContext['args'][0])
    result = []
    for series in previewList:

        if windowInterval:
            # the resolution here is really the number of points to maintain
            # so we need to convert the "seconds" to num points
            windowPoints = round((series.end - series.start) / windowInterval)
        else:
            use_res = int(resolution)
            if len(series) < use_res:
                use_res = len(series)
            windowPoints = use_res

        if isinstance(resolution, six.string_types):
            newName = 'asap(%s,"%s")' % (series.name, resolution)
        else:
            newName = "asap(%s,%s)" % (series.name, resolution)

        step_guess = (series.end - series.start) // windowPoints

        newSeries = TimeSeries(
            newName,
            series.start,
            series.end,
            step_guess,
            []
        )
        newSeries.pathExpression = newName

        # detect "none" lists
        if len([v for v in series if v is not None]) <= 1:
            newSeries.extend(series)
        else:
            # the "resolution" is a suggestion,
            # the algo will alter it some inorder
            # to get the best view for things
            new_s = smooth(series, windowPoints)
            # steps need to be ints, so we must force the issue
            new_step = round((series.end - series.start) / len(new_s))
            newSeries.step = new_step
            newSeries.extend(new_s)
        result.append(newSeries)

    return result