Ejemplo n.º 1
0
    def testMovingAverage(self):
        """
    Test that the (internal) moving average maintains the averages correctly,
    even for null initial condition and when the number of values goes over
    windowSize.  Pass in integers and floats.
    """
        historicalValues = []
        total = 0
        windowSize = 3
        newAverage, historicalValues, total = MovingAverage.compute(historicalValues, total, 3, windowSize)

        self.assertEqual(newAverage, 3.0)
        self.assertEqual(historicalValues, [3.0])
        self.assertEqual(total, 3.0)

        newAverage, historicalValues, total = MovingAverage.compute(historicalValues, total, 4, windowSize)
        self.assertEqual(newAverage, 3.5)
        self.assertListEqual(historicalValues, [3.0, 4.0])
        self.assertEqual(total, 7.0)

        newAverage, historicalValues, total = MovingAverage.compute(historicalValues, total, 5.0, windowSize)
        self.assertEqual(newAverage, 4.0)
        self.assertListEqual(historicalValues, [3.0, 4.0, 5.0])
        self.assertEqual(total, 12.0)

        # Ensure the first value gets popped
        newAverage, historicalValues, total = MovingAverage.compute(historicalValues, total, 6.0, windowSize)
        self.assertEqual(newAverage, 5.0)
        self.assertListEqual(historicalValues, [4.0, 5.0, 6.0])
        self.assertEqual(total, 15.0)
Ejemplo n.º 2
0
  def testMovingAverageInstance(self):
    """
    Test that the (internal) moving average maintains the averages correctly,
    even for null initial condition and when the number of values goes over
    windowSize.  Pass in integers and floats.
    this is for the instantce method next()
    """
    ma = MovingAverage(windowSize=3)

    newAverage = ma.next(3)
    self.assertEqual(newAverage, 3.0)
    self.assertListEqual(ma.getSlidingWindow(), [3.0])
    self.assertEqual(ma.total, 3.0)

    newAverage = ma.next(4)
    self.assertEqual(newAverage, 3.5)
    self.assertListEqual(ma.getSlidingWindow(), [3.0, 4.0])
    self.assertEqual(ma.total, 7.0)

    newAverage = ma.next(5)
    self.assertEqual(newAverage, 4.0)
    self.assertListEqual(ma.getSlidingWindow(), [3.0, 4.0, 5.0])
    self.assertEqual(ma.total, 12.0)

    # Ensure the first value gets popped
    newAverage = ma.next(6)
    self.assertEqual(newAverage, 5.0)
    self.assertListEqual(ma.getSlidingWindow(), [4.0, 5.0, 6.0])
    self.assertEqual(ma.total, 15.0)
Ejemplo n.º 3
0
  def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score
         will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
         The transformation is applied after moving average is computed and updated.
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and ( 
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or 
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")
Ejemplo n.º 4
0
    def testMovingAverage(self):
        """
    Test that the (internal) moving average maintains the averages correctly,
    even for null initial condition and when the number of values goes over
    windowSize.  Pass in integers and floats.
    """
        historicalValues = []
        total = 0
        windowSize = 3
        newAverage, historicalValues, total = (MovingAverage.compute(
            historicalValues, total, 3, windowSize))

        self.assertEqual(newAverage, 3.0)
        self.assertEqual(historicalValues, [3.0])
        self.assertEqual(total, 3.0)

        newAverage, historicalValues, total = (MovingAverage.compute(
            historicalValues, total, 4, windowSize))
        self.assertEqual(newAverage, 3.5)
        self.assertListEqual(historicalValues, [3.0, 4.0])
        self.assertEqual(total, 7.0)

        newAverage, historicalValues, total = (MovingAverage.compute(
            historicalValues, total, 5.0, windowSize))
        self.assertEqual(newAverage, 4.0)
        self.assertListEqual(historicalValues, [3.0, 4.0, 5.0])
        self.assertEqual(total, 12.0)

        # Ensure the first value gets popped
        newAverage, historicalValues, total = (MovingAverage.compute(
            historicalValues, total, 6.0, windowSize))
        self.assertEqual(newAverage, 5.0)
        self.assertListEqual(historicalValues, [4.0, 5.0, 6.0])
        self.assertEqual(total, 15.0)
Ejemplo n.º 5
0
    def __init__(self,
                 slidingWindowSize=None,
                 mode=MODE_PURE,
                 binaryAnomalyThreshold=None):
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if (self._mode == Anomaly.MODE_LIKELIHOOD
                or self._mode == Anomaly.MODE_WEIGHTED):
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        else:
            self._likelihood = None

        if not self._mode in self._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)

        self._binaryThreshold = binaryAnomalyThreshold
        if binaryAnomalyThreshold is not None and (
                not isinstance(binaryAnomalyThreshold, float)
                or binaryAnomalyThreshold >= 1.0
                or binaryAnomalyThreshold <= 0.0):
            raise ValueError(
                "Anomaly: binaryAnomalyThreshold must be from (0,1) "
                "or None if disabled.")
Ejemplo n.º 6
0
  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
Ejemplo n.º 7
0
    def __init__(self,
                 w,
                 minval=None,
                 maxval=None,
                 periodic=False,
                 n=0,
                 radius=0,
                 resolution=0,
                 name=None,
                 verbosity=0,
                 clipInput=True,
                 forced=False):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.__init__]
    """
        self._learningEnabled = True
        if periodic:
            #Adaptive scalar encoders take non-periodic inputs only
            raise Exception(
                'Adaptive scalar encoder does not encode periodic inputs')
        assert n != 0  #An adaptive encoder can only be intialized using n

        super(AdaptiveScalarEncoder, self).__init__(w=w,
                                                    n=n,
                                                    minval=minval,
                                                    maxval=maxval,
                                                    clipInput=True,
                                                    name=name,
                                                    verbosity=verbosity,
                                                    forced=forced)
        self.recordNum = 0  #how many inputs have been sent to the encoder?
        self.slidingWindow = MovingAverage(300)
Ejemplo n.º 8
0
    def testSerialization(self):
        """serialization using pickle"""
        ma = MovingAverage(windowSize=3)

        ma.next(3)
        ma.next(4.5)
        ma.next(5)

        stored = pickle.dumps(ma)
        restored = pickle.loads(stored)
        self.assertEqual(restored, ma)
        self.assertEqual(ma.next(6), restored.next(6))
Ejemplo n.º 9
0
def _anomalyScoreMovingAverage(anomalyScores, windowSize=10, verbosity=0):
    """
  Given a list of anomaly scores return a list of averaged records.
  anomalyScores is assumed to be a list of records of the form:
                [datetime.datetime(2013, 8, 10, 23, 0), 6.0, 1.0]

  Each record in the returned list list contains:
      [datetime, value, averagedScore]

  *Note:* we only average the anomaly score.
  """

    historicalValues = []
    total = 0.0
    averagedRecordList = []  # Aggregated records
    for record in anomalyScores:

        # Skip (but log) records without correct number of entries
        if not isinstance(record, (list, tuple)) or len(record) != 3:
            if verbosity >= 1:
                print("Malformed record:", record)
            continue

        avg, historicalValues, total = MovingAverage.compute(historicalValues, total, record[2], windowSize)

        averagedRecordList.append([record[0], record[1], avg])

        if verbosity > 2:
            print("Aggregating input record:", record)
            print("Result:", [record[0], record[1], avg])

    return averagedRecordList, historicalValues, total
Ejemplo n.º 10
0
  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
Ejemplo n.º 11
0
  def __init__(self,
               slidingWindowSize=None,
               mode=MODE_PURE,
               binaryAnomalyThreshold=None):
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if (self._mode == Anomaly.MODE_LIKELIHOOD or
        self._mode == Anomaly.MODE_WEIGHTED):
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    else:
      self._likelihood = None

    if not self._mode in self._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)

    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and (
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")
Ejemplo n.º 12
0
  def testSerialization(self):
    """serialization using pickle"""
    ma = MovingAverage(windowSize=3)

    ma.next(3)
    ma.next(4.5)
    ma.next(5)

    stored = pickle.dumps(ma)
    restored = pickle.loads(stored)
    self.assertEqual(restored, ma) 
    self.assertEqual(ma.next(6), restored.next(6))
Ejemplo n.º 13
0
  def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0,
                resolution=0, name=None, verbosity=0, clipInput=True, forced=False):
    self._learningEnabled = True
    if periodic:
      #Adaptive scalar encoders take non-periodic inputs only
      raise Exception('Adaptive scalar encoder does not encode periodic inputs')
    assert n!=0           #An adaptive encoder can only be intialized using n

    super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval,
                                clipInput=True, name=name, verbosity=verbosity, forced=forced)
    self.recordNum=0    #how many inputs have been sent to the encoder?
    self.slidingWindow = MovingAverage(300)
Ejemplo n.º 14
0
  def testMovingAverageReadWrite(self):
    ma = MovingAverage(windowSize=3)

    ma.next(3)
    ma.next(4)
    ma.next(5)

    proto1 = MovingAverageProto.new_message()
    ma.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = MovingAverageProto.read(f)

    resurrectedMa = MovingAverage.read(proto2)

    newAverage = ma.next(6)
    self.assertEqual(newAverage, resurrectedMa.next(6))
    self.assertListEqual(ma.getSlidingWindow(),
                         resurrectedMa.getSlidingWindow())
    self.assertEqual(ma.total, resurrectedMa.total)
Ejemplo n.º 15
0
  def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0,
                resolution=0, name=None, verbosity=0, clipInput=True, forced=False):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.__init__]
    """
    self._learningEnabled = True
    if periodic:
      #Adaptive scalar encoders take non-periodic inputs only
      raise Exception('Adaptive scalar encoder does not encode periodic inputs')        # check_later: is there any instance where adaptive adaptive input is periodic?
    assert n!=0           #An adaptive encoder can only be intialized using n

    super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval,
                                clipInput=True, name=name, verbosity=verbosity, forced=forced)    # to_note: access ScalarEncoder's __init__
    self.recordNum=0    #how many inputs have been sent to the encoder?
    self.slidingWindow = MovingAverage(300)
Ejemplo n.º 16
0
  def testMovingAverageSlidingWindowInit(self):
    """
    Test the slidingWindow value is correctly assigned when initializing a
    new MovingAverage object.
    """
    # With exisiting historical values; same values as tested in testMovingAverage()
    ma = MovingAverage(windowSize=3, existingHistoricalValues=[3.0, 4.0, 5.0])
    self.assertListEqual(ma.getSlidingWindow(), [3.0, 4.0, 5.0])

    # Withoout exisiting historical values
    ma = MovingAverage(windowSize=3)
    self.assertListEqual(ma.getSlidingWindow(), [])
Ejemplo n.º 17
0
def _anomalyScoreMovingAverage(anomalyScores,
                               windowSize=10,
                               verbosity=0,
                              ):
  """
  Given a list of anomaly scores return a list of averaged records.
  anomalyScores is assumed to be a list of records of the form:
                [datetime.datetime(2013, 8, 10, 23, 0), 6.0, 1.0]

  Each record in the returned list list contains:
      [datetime, value, averagedScore]

  *Note:* we only average the anomaly score.
  """

  historicalValues = []
  total = 0.0
  averagedRecordList = []    # Aggregated records
  for record in anomalyScores:

    # Skip (but log) records without correct number of entries
    if not isinstance(record, (list, tuple)) or len(record) != 3:
      if verbosity >= 1:
        print "Malformed record:", record
      continue

    avg, historicalValues, total = (
      MovingAverage.compute(historicalValues, total, record[2], windowSize)
      )

    averagedRecordList.append( [record[0], record[1], avg] )

    if verbosity > 2:
      print "Aggregating input record:", record
      print "Result:", [record[0], record[1], avg]

  return averagedRecordList, historicalValues, total
Ejemplo n.º 18
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """


  # anomaly modes supported
  MODE_PURE = "pure"
  MODE_LIKELIHOOD = "likelihood"
  MODE_WEIGHTED = "weighted"
  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)


  def compute(self, activeColumns, predictedColumns, 
				inputValue=None, timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      if inputValue is None:
        raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                 "requires 'inputValue' as parameter to compute() method. ")

      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      # low likelihood -> hi anomaly
      score = 1 - probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      score = anomalyScore * (1 - probability)

    # Last, do moving-average if windowSize was specified.
    if self._movingAverage is not None:
      score = self._movingAverage.next(score)

    return score
Ejemplo n.º 19
0
class AdaptiveScalarEncoder(ScalarEncoder):
    """
  This is an implementation of the scalar encoder that adapts the min and
  max of the scalar encoder dynamically. This is essential to the streaming
  model of the online prediction framework.

  Initialization of an adapive encoder using resolution or radius is not supported;
  it must be intitialized with n. This n is kept constant while the min and max of the
  encoder changes.

  The adaptive encoder must be have periodic set to false.

  The adaptive encoder may be initialized with a minval and maxval or with `None`
  for each of these. In the latter case, the min and max are set as the 1st and 99th
  percentile over a window of the past 100 records.

  **Note:** the sliding window may record duplicates of the values in the dataset,
  and therefore does not reflect the statistical distribution of the input data
  and may not be used to calculate the median, mean etc.
  """

    ############################################################################
    def __init__(self,
                 w,
                 minval=None,
                 maxval=None,
                 periodic=False,
                 n=0,
                 radius=0,
                 resolution=0,
                 name=None,
                 verbosity=0,
                 clipInput=True,
                 forced=False):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.__init__]
    """
        self._learningEnabled = True
        if periodic:
            #Adaptive scalar encoders take non-periodic inputs only
            raise Exception(
                'Adaptive scalar encoder does not encode periodic inputs')
        assert n != 0  #An adaptive encoder can only be intialized using n

        super(AdaptiveScalarEncoder, self).__init__(w=w,
                                                    n=n,
                                                    minval=minval,
                                                    maxval=maxval,
                                                    clipInput=True,
                                                    name=name,
                                                    verbosity=verbosity,
                                                    forced=forced)
        self.recordNum = 0  #how many inputs have been sent to the encoder?
        self.slidingWindow = MovingAverage(300)

    ############################################################################
    def _setEncoderParams(self):
        """
    Set the radius, resolution and range. These values are updated when minval
    and/or maxval change.
    """

        self.rangeInternal = float(self.maxval - self.minval)

        self.resolution = float(self.rangeInternal) / (self.n - self.w)
        self.radius = self.w * self.resolution
        self.range = self.rangeInternal + self.resolution

        # nInternal represents the output area excluding the possible padding on each side
        self.nInternal = self.n - 2 * self.padding

        # Invalidate the bucket values cache so that they get recomputed
        self._bucketValues = None

    ############################################################################
    def setFieldStats(self, fieldName, fieldStats):
        """
    TODO: document
    """
        #If the stats are not fully formed, ignore.
        if fieldStats[fieldName]['min'] == None or \
          fieldStats[fieldName]['max'] == None:
            return
        self.minval = fieldStats[fieldName]['min']
        self.maxval = fieldStats[fieldName]['max']
        if self.minval == self.maxval:
            self.maxval += 1
        self._setEncoderParams()

    ############################################################################
    def _setMinAndMax(self, input, learn):
        """
    Potentially change the minval and maxval using input.
    **The learn flag is currently not supported by cla regions.**
    """

        self.slidingWindow.next(input)

        if self.minval is None and self.maxval is None:
            self.minval = input
            self.maxval = input + 1  #When the min and max and unspecified and only one record has been encoded
            self._setEncoderParams()

        elif learn:
            sorted = self.slidingWindow.getSlidingWindow()
            sorted.sort()

            minOverWindow = sorted[0]
            maxOverWindow = sorted[len(sorted) - 1]

            if minOverWindow < self.minval:
                #initialBump = abs(self.minval-minOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2      #decrement minval more aggressively in the beginning
                if self.verbosity >= 2:
                    print "Input %s=%.2f smaller than minval %.2f. Adjusting minval to %.2f"\
                                    % (self.name, input, self.minval, minOverWindow)
                self.minval = minOverWindow  #-initialBump
                self._setEncoderParams()

            if maxOverWindow > self.maxval:
                #initialBump = abs(self.maxval-maxOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2     #decrement maxval more aggressively in the beginning
                if self.verbosity >= 2:
                    print "Input %s=%.2f greater than maxval %.2f. Adjusting maxval to %.2f" \
                                    % (self.name, input, self.maxval, maxOverWindow)
                self.maxval = maxOverWindow  #+initialBump
                self._setEncoderParams()

    ############################################################################
    def getBucketIndices(self, input, learn=None):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.getBucketIndices]
    """

        self.recordNum += 1
        if learn is None:
            learn = self._learningEnabled

        if type(input) is float and math.isnan(input):
            input = SENTINEL_VALUE_FOR_MISSING_DATA

        if input == SENTINEL_VALUE_FOR_MISSING_DATA:
            return [None]
        else:
            self._setMinAndMax(input, learn)
            return super(AdaptiveScalarEncoder, self).getBucketIndices(input)

    ############################################################################
    def encodeIntoArray(self, input, output, learn=None):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.encodeIntoArray]
    """

        self.recordNum += 1
        if learn is None:
            learn = self._learningEnabled
        if input == SENTINEL_VALUE_FOR_MISSING_DATA:
            output[0:self.n] = 0
        elif not math.isnan(input):
            self._setMinAndMax(input, learn)

        super(AdaptiveScalarEncoder, self).encodeIntoArray(input, output)

    ############################################################################
    def getBucketInfo(self, buckets):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.getBucketInfo]
    """

        if self.minval is None or self.maxval is None:
            return [
                EncoderResult(value=0, scalar=0, encoding=numpy.zeros(self.n))
            ]

        return super(AdaptiveScalarEncoder, self).getBucketInfo(buckets)

    ############################################################################
    def topDownCompute(self, encoded):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.topDownCompute]
    """

        if self.minval is None or self.maxval is None:
            return [
                EncoderResult(value=0, scalar=0, encoding=numpy.zeros(self.n))
            ]
        return super(AdaptiveScalarEncoder, self).topDownCompute(encoded)

    ############################################################################
    def dump(self):
        """
    Prints details about current state to stdout.
    """
        print "AdaptiveScalarEncoder:"
        print "  min: %f" % self.minval
        print "  max: %f" % self.maxval
        print "  w:   %d" % self.w
        print "  n:   %d" % self.n
        print "  resolution: %f" % self.resolution
        print "  radius:     %f" % self.radius
        print "  periodic: %s" % self.periodic
        print "  nInternal: %d" % self.nInternal
        print "  rangeInternal: %f" % self.rangeInternal
        print "  padding: %d" % self.padding

    @classmethod
    def read(cls, proto):
        encoder = super(AdaptiveScalarEncoder, cls).read(proto)
        encoder.recordNum = proto.recordNum
        encoder.slidingWindow = MovingAverage.read(proto.slidingWindow)
        return encoder

    def write(self, proto):
        super(AdaptiveScalarEncoder, self).write(proto)
        proto.recordNum = self.recordNum
        self.slidingWindow.write(proto.slidingWindow)
Ejemplo n.º 20
0
 def read(cls, proto):
     encoder = super(AdaptiveScalarEncoder, cls).read(proto)
     encoder.recordNum = proto.recordNum
     encoder.slidingWindow = MovingAverage.read(proto.slidingWindow)
     return encoder
Ejemplo n.º 21
0
def updateAnomalyLikelihoods(anomalyScores,
                             params,
                             verbosity=0): # pylint: disable=W0613
  """
  Compute updated probabilities for anomalyScores using the given params.

  :param anomalyScores: a list of records. Each record is a list with the
                        following three elements: [timestamp, value, score]

                        Example::

                            [datetime.datetime(2013, 8, 10, 23, 0), 6.0, 1.0]

  :param params: the JSON dict returned by estimateAnomalyLikelihoods
  :param verbosity: integer controlling extent of printouts for debugging
  :type verbosity: int

  :returns: 3-tuple consisting of:

            - likelihoods

              numpy array of likelihoods, one for each aggregated point

            - avgRecordList

              list of averaged input records

            - params

              an updated JSON object containing the state of this metric.

  """
  if verbosity > 3:
    print "In updateAnomalyLikelihoods."
    print "Number of anomaly scores:", len(anomalyScores)
    print "First 20:", anomalyScores[0:min(20, len(anomalyScores))]
    print "Params:", params

  if len(anomalyScores) == 0:
    raise ValueError("Must have at least one anomalyScore")

  if not isValidEstimatorParams(params):
    raise ValueError("'params' is not a valid params structure")

  # For backward compatibility.
  if not params.has_key("historicalLikelihoods"):
    params["historicalLikelihoods"] = [1.0]

  # Compute moving averages of these new scores using the previous values
  # as well as likelihood for these scores using the old estimator
  historicalValues  = params["movingAverage"]["historicalValues"]
  total             = params["movingAverage"]["total"]
  windowSize        = params["movingAverage"]["windowSize"]

  aggRecordList = numpy.zeros(len(anomalyScores), dtype=float)
  likelihoods = numpy.zeros(len(anomalyScores), dtype=float)
  for i, v in enumerate(anomalyScores):
    newAverage, historicalValues, total = (
      MovingAverage.compute(historicalValues, total, v[2], windowSize)
    )
    aggRecordList[i] = newAverage
    likelihoods[i]   = normalProbability(newAverage, params["distribution"])

  # Filter the likelihood values. First we prepend the historical likelihoods
  # to the current set. Then we filter the values.  We peel off the likelihoods
  # to return and the last windowSize values to store for later.
  likelihoods2 = params["historicalLikelihoods"] + list(likelihoods)
  filteredLikelihoods = _filterLikelihoods(likelihoods2)
  likelihoods[:] = filteredLikelihoods[-len(likelihoods):]
  historicalLikelihoods = likelihoods2[-min(windowSize, len(likelihoods2)):]

  # Update the estimator
  newParams = {
    "distribution": params["distribution"],
    "movingAverage": {
      "historicalValues": historicalValues,
      "total": total,
      "windowSize": windowSize,
    },
    "historicalLikelihoods": historicalLikelihoods,
  }

  assert len(newParams["historicalLikelihoods"]) <= windowSize

  if verbosity > 3:
    print "Number of likelihoods:", len(likelihoods)
    print "First 20 likelihoods:", likelihoods[0:min(20, len(likelihoods))]
    print "Leaving updateAnomalyLikelihoods."

  return (likelihoods, aggRecordList, newParams)
Ejemplo n.º 22
0
def updateAnomalyLikelihoods(anomalyScores,
                             params,
                             verbosity=0):
  """
  Compute updated probabilities for anomalyScores using the given params.

  :param anomalyScores: a list of records. Each record is a list with the
                        following three elements: [timestamp, value, score]

                        Example::

                            [datetime.datetime(2013, 8, 10, 23, 0), 6.0, 1.0]

  :param params: the JSON dict returned by estimateAnomalyLikelihoods
  :param verbosity: integer controlling extent of printouts for debugging
  :type verbosity: int

  :returns: 3-tuple consisting of:

            - likelihoods

              numpy array of likelihoods, one for each aggregated point

            - avgRecordList

              list of averaged input records

            - params

              an updated JSON object containing the state of this metric.

  """
  if verbosity > 3:
    print "In updateAnomalyLikelihoods."
    print "Number of anomaly scores:", len(anomalyScores)
    print "First 20:", anomalyScores[0:min(20, len(anomalyScores))]
    print "Params:", params

  if len(anomalyScores) == 0:
    raise ValueError("Must have at least one anomalyScore")

  if not isValidEstimatorParams(params):
    raise ValueError("'params' is not a valid params structure")

  # For backward compatibility.
  if not params.has_key("historicalLikelihoods"):
    params["historicalLikelihoods"] = [1.0]

  # Compute moving averages of these new scores using the previous values
  # as well as likelihood for these scores using the old estimator
  historicalValues  = params["movingAverage"]["historicalValues"]
  total             = params["movingAverage"]["total"]
  windowSize        = params["movingAverage"]["windowSize"]

  aggRecordList = numpy.zeros(len(anomalyScores), dtype=float)
  likelihoods = numpy.zeros(len(anomalyScores), dtype=float)
  for i, v in enumerate(anomalyScores):
    newAverage, historicalValues, total = (
      MovingAverage.compute(historicalValues, total, v[2], windowSize)
    )
    aggRecordList[i] = newAverage
    likelihoods[i]   = normalProbability(newAverage, params["distribution"])

  # Filter the likelihood values. First we prepend the historical likelihoods
  # to the current set. Then we filter the values.  We peel off the likelihoods
  # to return and the last windowSize values to store for later.
  likelihoods2 = params["historicalLikelihoods"] + list(likelihoods)
  filteredLikelihoods = _filterLikelihoods(likelihoods2)
  likelihoods[:] = filteredLikelihoods[-len(likelihoods):]
  historicalLikelihoods = likelihoods2[-min(windowSize, len(likelihoods2)):]

  # Update the estimator
  newParams = {
    "distribution": params["distribution"],
    "movingAverage": {
      "historicalValues": historicalValues,
      "total": total,
      "windowSize": windowSize,
    },
    "historicalLikelihoods": historicalLikelihoods,
  }

  assert len(newParams["historicalLikelihoods"]) <= windowSize

  if verbosity > 3:
    print "Number of likelihoods:", len(likelihoods)
    print "First 20 likelihoods:", likelihoods[0:min(20, len(likelihoods))]
    print "Leaving updateAnomalyLikelihoods."

  return (likelihoods, aggRecordList, newParams)
Ejemplo n.º 23
0
  def testEquals(self):
    ma = MovingAverage(windowSize=3)
    maP = MovingAverage(windowSize=3)
    self.assertEqual(ma, maP)
    
    maN = MovingAverage(windowSize=10)
    self.assertNotEqual(ma, maN)

    ma = MovingAverage(windowSize=2, existingHistoricalValues=[3.0, 4.0, 5.0])
    maP = MovingAverage(windowSize=2, existingHistoricalValues=[3.0, 4.0, 5.0])
    self.assertEqual(ma, maP)
    maP.next(6)
    self.assertNotEqual(ma, maP)
    ma.next(6)
    self.assertEqual(ma, maP)
Ejemplo n.º 24
0
  def testMovingAverageReadWrite(self):
    ma = MovingAverage(windowSize=3)

    ma.next(3)
    ma.next(4)
    ma.next(5)

    proto1 = MovingAverageProto.new_message()
    ma.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = MovingAverageProto.read(f)

    resurrectedMa = MovingAverage.read(proto2)

    newAverage = ma.next(6)
    self.assertEqual(newAverage, resurrectedMa.next(6))
    self.assertListEqual(ma.getSlidingWindow(),
                         resurrectedMa.getSlidingWindow())
    self.assertEqual(ma.total, resurrectedMa.total)
    self.assertTrue(ma, resurrectedMa) #using the __eq__ method
Ejemplo n.º 25
0
 def read(cls, proto):
   encoder = super(AdaptiveScalarEncoder, cls).read(proto)
   encoder.recordNum = proto.recordNum
   encoder.slidingWindow = MovingAverage.read(proto.slidingWindow)
   return encoder
Ejemplo n.º 26
0
class Anomaly(object):
    """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """

    # anomaly modes supported
    MODE_PURE = "pure"
    MODE_LIKELIHOOD = "likelihood"
    MODE_WEIGHTED = "weighted"
    _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)

    def __init__(self,
                 slidingWindowSize=None,
                 mode=MODE_PURE,
                 binaryAnomalyThreshold=None):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score
         will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
         The transformation is applied after moving average is computed and updated.
    """
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)
        self._binaryThreshold = binaryAnomalyThreshold
        if binaryAnomalyThreshold is not None and (
                not isinstance(binaryAnomalyThreshold, float)
                or binaryAnomalyThreshold >= 1.0
                or binaryAnomalyThreshold <= 0.0):
            raise ValueError(
                "Anomaly: binaryAnomalyThreshold must be from (0,1) "
                "or None if disabled.")

    def compute(self,
                activeColumns,
                predictedColumns,
                inputValue=None,
                timestamp=None):
        """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
        # Start by computing the raw anomaly score.
        anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

        # Compute final anomaly based on selected mode.
        if self._mode == Anomaly.MODE_PURE:
            score = anomalyScore
        elif self._mode == Anomaly.MODE_LIKELIHOOD:
            if inputValue is None:
                raise ValueError(
                    "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                    "requires 'inputValue' as parameter to compute() method. ")

            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            # low likelihood -> hi anomaly
            score = 1 - probability
        elif self._mode == Anomaly.MODE_WEIGHTED:
            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            score = anomalyScore * (1 - probability)

        # Last, do moving-average if windowSize was specified.
        if self._movingAverage is not None:
            score = self._movingAverage.next(score)

        # apply binary discretization if required
        if self._binaryThreshold is not None:
            if score >= self._binaryThreshold:
                score = 1.0
            else:
                score = 0.0

        return score

    def __str__(self):
        windowSize = 0
        if self._movingAverage is not None:
            windowSize = self._movingAverage.windowSize
        return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize)

    def __setstate__(self, state):
        """deserialization"""
        self.__dict__.update(state)

        if not hasattr(self, '_mode'):
            self._mode = Anomaly.MODE_PURE
        if not hasattr(self, '_movingAverage'):
            self._movingAverage = None
        if not hasattr(self, '_binaryThreshold'):
            self._binaryThreshold = None
Ejemplo n.º 27
0
class AdaptiveScalarEncoder(ScalarEncoder):
  """
  This is an implementation of the scalar encoder that adapts the min and
  max of the scalar encoder dynamically. This is essential to the streaming
  model of the online prediction framework.

  Initialization of an adapive encoder using resolution or radius is not supported;
  it must be intitialized with n. This n is kept constant while the min and max of the
  encoder changes.

  The adaptive encoder must be have periodic set to false.

  The adaptive encoder may be initialized with a minval and maxval or with `None`
  for each of these. In the latter case, the min and max are set as the 1st and 99th
  percentile over a window of the past 100 records.

  **Note:** the sliding window may record duplicates of the values in the dataset,
  and therefore does not reflect the statistical distribution of the input data
  and may not be used to calculate the median, mean etc.
  """


  def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0,
                resolution=0, name=None, verbosity=0, clipInput=True, forced=False):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.__init__]
    """
    self._learningEnabled = True
    if periodic:
      #Adaptive scalar encoders take non-periodic inputs only
      raise Exception('Adaptive scalar encoder does not encode periodic inputs')
    assert n!=0           #An adaptive encoder can only be intialized using n

    super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval,
                                clipInput=True, name=name, verbosity=verbosity, forced=forced)
    self.recordNum=0    #how many inputs have been sent to the encoder?
    self.slidingWindow = MovingAverage(300)


  def _setEncoderParams(self):
    """
    Set the radius, resolution and range. These values are updated when minval
    and/or maxval change.
    """

    self.rangeInternal = float(self.maxval - self.minval)

    self.resolution = float(self.rangeInternal) / (self.n - self.w)
    self.radius = self.w * self.resolution
    self.range = self.rangeInternal + self.resolution

    # nInternal represents the output area excluding the possible padding on each side
    self.nInternal = self.n - 2 * self.padding

    # Invalidate the bucket values cache so that they get recomputed
    self._bucketValues = None


  def setFieldStats(self, fieldName, fieldStats):
    """
    TODO: document
    """
    #If the stats are not fully formed, ignore.
    if fieldStats[fieldName]['min'] is None or \
      fieldStats[fieldName]['max'] is None:
        return
    self.minval = fieldStats[fieldName]['min']
    self.maxval = fieldStats[fieldName]['max']
    if self.minval == self.maxval:
      self.maxval+=1
    self._setEncoderParams()


  def _setMinAndMax(self, input, learn):
    """
    Potentially change the minval and maxval using input.
    **The learn flag is currently not supported by cla regions.**
    """

    self.slidingWindow.next(input)

    if self.minval is None and self.maxval is None:
      self.minval = input
      self.maxval = input+1   #When the min and max and unspecified and only one record has been encoded
      self._setEncoderParams()

    elif learn:
      sorted = self.slidingWindow.getSlidingWindow()
      sorted.sort()

      minOverWindow = sorted[0]
      maxOverWindow = sorted[len(sorted)-1]

      if minOverWindow < self.minval:
        #initialBump = abs(self.minval-minOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2      #decrement minval more aggressively in the beginning
        if self.verbosity >= 2:
          print "Input {0!s}={1:.2f} smaller than minval {2:.2f}. Adjusting minval to {3:.2f}".format(self.name, input, self.minval, minOverWindow)
        self.minval = minOverWindow       #-initialBump
        self._setEncoderParams()

      if maxOverWindow > self.maxval:
        #initialBump = abs(self.maxval-maxOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2     #decrement maxval more aggressively in the beginning
        if self.verbosity >= 2:
          print "Input {0!s}={1:.2f} greater than maxval {2:.2f}. Adjusting maxval to {3:.2f}".format(self.name, input, self.maxval, maxOverWindow)
        self.maxval = maxOverWindow       #+initialBump
        self._setEncoderParams()


  def getBucketIndices(self, input, learn=None):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.getBucketIndices]
    """

    self.recordNum +=1
    if learn is None:
      learn = self._learningEnabled

    if type(input) is float and math.isnan(input):
      input = SENTINEL_VALUE_FOR_MISSING_DATA

    if input == SENTINEL_VALUE_FOR_MISSING_DATA:
      return [None]
    else:
      self._setMinAndMax(input, learn)
      return super(AdaptiveScalarEncoder, self).getBucketIndices(input)


  def encodeIntoArray(self, input, output,learn=None):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.encodeIntoArray]
    """

    self.recordNum +=1
    if learn is None:
      learn = self._learningEnabled
    if input == SENTINEL_VALUE_FOR_MISSING_DATA:
        output[0:self.n] = 0
    elif not math.isnan(input):
      self._setMinAndMax(input, learn)

    super(AdaptiveScalarEncoder, self).encodeIntoArray(input, output)

  def getBucketInfo(self, buckets):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.getBucketInfo]
    """

    if self.minval is None or self.maxval is None:
      return [EncoderResult(value=0, scalar=0,
                           encoding=numpy.zeros(self.n))]

    return super(AdaptiveScalarEncoder, self).getBucketInfo(buckets)


  def topDownCompute(self, encoded):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.topDownCompute]
    """

    if self.minval is None or self.maxval is None:
      return [EncoderResult(value=0, scalar=0,
                           encoding=numpy.zeros(self.n))]
    return super(AdaptiveScalarEncoder, self).topDownCompute(encoded)


  def dump(self):
    """
    Prints details about current state to stdout.
    """
    print "AdaptiveScalarEncoder:"
    print "  min: {0:f}".format(self.minval)
    print "  max: {0:f}".format(self.maxval)
    print "  w:   {0:d}".format(self.w)
    print "  n:   {0:d}".format(self.n)
    print "  resolution: {0:f}".format(self.resolution)
    print "  radius:     {0:f}".format(self.radius)
    print "  periodic: {0!s}".format(self.periodic)
    print "  nInternal: {0:d}".format(self.nInternal)
    print "  rangeInternal: {0:f}".format(self.rangeInternal)
    print "  padding: {0:d}".format(self.padding)


  @classmethod
  def read(cls, proto):
    encoder = super(AdaptiveScalarEncoder, cls).read(proto)
    encoder.recordNum = proto.recordNum
    encoder.slidingWindow = MovingAverage.read(proto.slidingWindow)
    return encoder


  def write(self, proto):
    super(AdaptiveScalarEncoder, self).write(proto)
    proto.recordNum = self.recordNum
    self.slidingWindow.write(proto.slidingWindow)
Ejemplo n.º 28
0
class Anomaly(object):
    """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """

    # anomaly modes supported
    MODE_PURE = "pure"
    MODE_LIKELIHOOD = "likelihood"
    MODE_WEIGHTED = "weighted"
    _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)

    def __init__(self, slidingWindowSize=None, mode=MODE_PURE):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if self._mode == Anomaly.MODE_LIKELIHOOD:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)

    def compute(self,
                activeColumns,
                predictedColumns,
                inputValue=None,
                timestamp=None):
        """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
        # Start by computing the raw anomaly score.
        anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

        # Compute final anomaly based on selected mode.
        if self._mode == Anomaly.MODE_PURE:
            score = anomalyScore
        elif self._mode == Anomaly.MODE_LIKELIHOOD:
            if inputValue is None:
                raise ValueError(
                    "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                    "requires 'inputValue' as parameter to compute() method. ")

            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            # low likelihood -> hi anomaly
            score = 1 - probability
        elif self._mode == Anomaly.MODE_WEIGHTED:
            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            score = anomalyScore * (1 - probability)

        # Last, do moving-average if windowSize was specified.
        if self._movingAverage is not None:
            score = self._movingAverage.next(score)

        return score
Ejemplo n.º 29
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  :param slidingWindowSize: [optional] - how many elements are summed up;
      enables moving average on final anomaly score; int >= 0

  :param mode: (string) [optional] how to compute anomaly, one of:

      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_PURE`
      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_LIKELIHOOD`
      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_WEIGHTED`

  :param binaryAnomalyThreshold: [optional] if set [0,1] anomaly score
       will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
       The transformation is applied after moving average is computed.

  """


  # anomaly modes supported
  MODE_PURE = "pure"
  """
  Default mode. The raw anomaly score as computed by
  :func:`~.anomaly_likelihood.computeRawAnomalyScore`
  """
  MODE_LIKELIHOOD = "likelihood"
  """
  Uses the :class:`~.anomaly_likelihood.AnomalyLikelihood` class, which models
  probability of receiving this value and anomalyScore
  """
  MODE_WEIGHTED = "weighted"
  """
  Multiplies the likelihood result with the raw anomaly score that was used to
  generate the likelihood (anomaly * likelihood)
  """

  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self,
               slidingWindowSize=None,
               mode=MODE_PURE,
               binaryAnomalyThreshold=None):
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if (self._mode == Anomaly.MODE_LIKELIHOOD or
        self._mode == Anomaly.MODE_WEIGHTED):
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    else:
      self._likelihood = None

    if not self._mode in self._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)

    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and (
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")


  def compute(self, activeColumns, predictedColumns,
              inputValue=None, timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    :param activeColumns: array of active column indices
    :param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    :param inputValue: (optional) value of current input to encoders
                                  (eg "cat" for category encoder)
                                  (used in anomaly-likelihood)
    :param timestamp: (optional) date timestamp when the sample occured
                                 (used in anomaly-likelihood)
    :returns: the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      if inputValue is None:
        raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                 "requires 'inputValue' as parameter to compute() method. ")

      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      # low likelihood -> hi anomaly
      score = 1 - probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      score = anomalyScore * (1 - probability)

    # Last, do moving-average if windowSize was specified.
    if self._movingAverage is not None:
      score = self._movingAverage.next(score)

    # apply binary discretization if required
    if self._binaryThreshold is not None:
      if score >= self._binaryThreshold:
        score = 1.0
      else:
        score = 0.0

    return score


  def __str__(self):
    windowSize = 0
    if self._movingAverage is not None:
      windowSize = self._movingAverage.windowSize
    return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize)


  def __eq__(self, other):
    return (isinstance(other, Anomaly) and
            other._mode == self._mode and
            other._binaryThreshold == self._binaryThreshold and
            other._movingAverage == self._movingAverage and
            other._likelihood == self._likelihood)


  def __setstate__(self, state):
    """deserialization"""
    self.__dict__.update(state)

    if not hasattr(self, '_mode'):
      self._mode = Anomaly.MODE_PURE
    if not hasattr(self, '_movingAverage'):
      self._movingAverage = None
    if not hasattr(self, '_binaryThreshold'):
      self._binaryThreshold = None