Ejemplo n.º 1
0
  def testMovingAverageInstance(self):
    """
    Test that the (internal) moving average maintains the averages correctly,
    even for null initial condition and when the number of values goes over
    windowSize.  Pass in integers and floats.
    this is for the instantce method next()
    """
    ma = MovingAverage(windowSize=3)

    newAverage = ma.next(3)
    self.assertEqual(newAverage, 3.0)
    self.assertListEqual(ma.getSlidingWindow(), [3.0])
    self.assertEqual(ma.total, 3.0)

    newAverage = ma.next(4)
    self.assertEqual(newAverage, 3.5)
    self.assertListEqual(ma.getSlidingWindow(), [3.0, 4.0])
    self.assertEqual(ma.total, 7.0)

    newAverage = ma.next(5)
    self.assertEqual(newAverage, 4.0)
    self.assertListEqual(ma.getSlidingWindow(), [3.0, 4.0, 5.0])
    self.assertEqual(ma.total, 12.0)

    # Ensure the first value gets popped
    newAverage = ma.next(6)
    self.assertEqual(newAverage, 5.0)
    self.assertListEqual(ma.getSlidingWindow(), [4.0, 5.0, 6.0])
    self.assertEqual(ma.total, 15.0)
Ejemplo n.º 2
0
  def testSerialization(self):
    """serialization using pickle"""
    ma = MovingAverage(windowSize=3)

    ma.next(3)
    ma.next(4)
    ma.next(5)

    stored = pickle.dumps(ma)
    restored = pickle.loads(stored)
    self.assertEqual(restored, ma) 
    self.assertEqual(ma.next(6), restored.next(6))
Ejemplo n.º 3
0
  def testEquals(self):
    ma = MovingAverage(windowSize=3)
    maP = MovingAverage(windowSize=3)
    self.assertEqual(ma, maP)
    
    maN = MovingAverage(windowSize=10)
    self.assertNotEqual(ma, maN)

    ma = MovingAverage(windowSize=2, existingHistoricalValues=[3.0, 4.0, 5.0])
    maP = MovingAverage(windowSize=2, existingHistoricalValues=[3.0, 4.0, 5.0])
    self.assertEqual(ma, maP)
    maP.next(6)
    self.assertNotEqual(ma, maP)
    ma.next(6)
    self.assertEqual(ma, maP)
Ejemplo n.º 4
0
    def testMovingAverageReadWrite(self):
        ma = MovingAverage(windowSize=3)

        ma.next(3)
        ma.next(4)
        ma.next(5)

        proto1 = MovingAverageProto.new_message()
        ma.write(proto1)

        # Write the proto to a temp file and read it back into a new proto
        with tempfile.TemporaryFile() as f:
            proto1.write(f)
            f.seek(0)
            proto2 = MovingAverageProto.read(f)

        resurrectedMa = MovingAverage.read(proto2)

        newAverage = ma.next(6)
        self.assertEqual(newAverage, resurrectedMa.next(6))
        self.assertListEqual(ma.getSlidingWindow(),
                             resurrectedMa.getSlidingWindow())
        self.assertEqual(ma.total, resurrectedMa.total)
Ejemplo n.º 5
0
  def testMovingAverageReadWrite(self):
    ma = MovingAverage(windowSize=3)

    ma.next(3)
    ma.next(4)
    ma.next(5)

    proto1 = MovingAverageProto.new_message()
    ma.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = MovingAverageProto.read(f)

    resurrectedMa = MovingAverage.read(proto2)

    newAverage = ma.next(6)
    self.assertEqual(newAverage, resurrectedMa.next(6))
    self.assertListEqual(ma.getSlidingWindow(),
                         resurrectedMa.getSlidingWindow())
    self.assertEqual(ma.total, resurrectedMa.total)
Ejemplo n.º 6
0
class AdaptiveScalarEncoder(ScalarEncoder):
    """
  This is an implementation of the scalar encoder that adapts the min and
  max of the scalar encoder dynamically. This is essential to the streaming
  model of the online prediction framework.

  Initialization of an adapive encoder using resolution or radius is not supported;
  it must be intitialized with n. This n is kept constant while the min and max of the
  encoder changes.

  The adaptive encoder must be have periodic set to false.

  The adaptive encoder may be initialized with a minval and maxval or with `None`
  for each of these. In the latter case, the min and max are set as the 1st and 99th
  percentile over a window of the past 100 records.

  **Note:** the sliding window may record duplicates of the values in the dataset,
  and therefore does not reflect the statistical distribution of the input data
  and may not be used to calculate the median, mean etc.
  """

    ############################################################################
    def __init__(self,
                 w,
                 minval=None,
                 maxval=None,
                 periodic=False,
                 n=0,
                 radius=0,
                 resolution=0,
                 name=None,
                 verbosity=0,
                 clipInput=True,
                 forced=False):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.__init__]
    """
        self._learningEnabled = True
        if periodic:
            #Adaptive scalar encoders take non-periodic inputs only
            raise Exception(
                'Adaptive scalar encoder does not encode periodic inputs')
        assert n != 0  #An adaptive encoder can only be intialized using n

        super(AdaptiveScalarEncoder, self).__init__(w=w,
                                                    n=n,
                                                    minval=minval,
                                                    maxval=maxval,
                                                    clipInput=True,
                                                    name=name,
                                                    verbosity=verbosity,
                                                    forced=forced)
        self.recordNum = 0  #how many inputs have been sent to the encoder?
        self.slidingWindow = MovingAverage(300)

    ############################################################################
    def _setEncoderParams(self):
        """
    Set the radius, resolution and range. These values are updated when minval
    and/or maxval change.
    """

        self.rangeInternal = float(self.maxval - self.minval)

        self.resolution = float(self.rangeInternal) / (self.n - self.w)
        self.radius = self.w * self.resolution
        self.range = self.rangeInternal + self.resolution

        # nInternal represents the output area excluding the possible padding on each side
        self.nInternal = self.n - 2 * self.padding

        # Invalidate the bucket values cache so that they get recomputed
        self._bucketValues = None

    ############################################################################
    def setFieldStats(self, fieldName, fieldStats):
        """
    TODO: document
    """
        #If the stats are not fully formed, ignore.
        if fieldStats[fieldName]['min'] == None or \
          fieldStats[fieldName]['max'] == None:
            return
        self.minval = fieldStats[fieldName]['min']
        self.maxval = fieldStats[fieldName]['max']
        if self.minval == self.maxval:
            self.maxval += 1
        self._setEncoderParams()

    ############################################################################
    def _setMinAndMax(self, input, learn):
        """
    Potentially change the minval and maxval using input.
    **The learn flag is currently not supported by cla regions.**
    """

        self.slidingWindow.next(input)

        if self.minval is None and self.maxval is None:
            self.minval = input
            self.maxval = input + 1  #When the min and max and unspecified and only one record has been encoded
            self._setEncoderParams()

        elif learn:
            sorted = self.slidingWindow.getSlidingWindow()
            sorted.sort()

            minOverWindow = sorted[0]
            maxOverWindow = sorted[len(sorted) - 1]

            if minOverWindow < self.minval:
                #initialBump = abs(self.minval-minOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2      #decrement minval more aggressively in the beginning
                if self.verbosity >= 2:
                    print "Input %s=%.2f smaller than minval %.2f. Adjusting minval to %.2f"\
                                    % (self.name, input, self.minval, minOverWindow)
                self.minval = minOverWindow  #-initialBump
                self._setEncoderParams()

            if maxOverWindow > self.maxval:
                #initialBump = abs(self.maxval-maxOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2     #decrement maxval more aggressively in the beginning
                if self.verbosity >= 2:
                    print "Input %s=%.2f greater than maxval %.2f. Adjusting maxval to %.2f" \
                                    % (self.name, input, self.maxval, maxOverWindow)
                self.maxval = maxOverWindow  #+initialBump
                self._setEncoderParams()

    ############################################################################
    def getBucketIndices(self, input, learn=None):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.getBucketIndices]
    """

        self.recordNum += 1
        if learn is None:
            learn = self._learningEnabled

        if type(input) is float and math.isnan(input):
            input = SENTINEL_VALUE_FOR_MISSING_DATA

        if input == SENTINEL_VALUE_FOR_MISSING_DATA:
            return [None]
        else:
            self._setMinAndMax(input, learn)
            return super(AdaptiveScalarEncoder, self).getBucketIndices(input)

    ############################################################################
    def encodeIntoArray(self, input, output, learn=None):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.encodeIntoArray]
    """

        self.recordNum += 1
        if learn is None:
            learn = self._learningEnabled
        if input == SENTINEL_VALUE_FOR_MISSING_DATA:
            output[0:self.n] = 0
        elif not math.isnan(input):
            self._setMinAndMax(input, learn)

        super(AdaptiveScalarEncoder, self).encodeIntoArray(input, output)

    ############################################################################
    def getBucketInfo(self, buckets):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.getBucketInfo]
    """

        if self.minval is None or self.maxval is None:
            return [
                EncoderResult(value=0, scalar=0, encoding=numpy.zeros(self.n))
            ]

        return super(AdaptiveScalarEncoder, self).getBucketInfo(buckets)

    ############################################################################
    def topDownCompute(self, encoded):
        """
    [overrides nupic.encoders.scalar.ScalarEncoder.topDownCompute]
    """

        if self.minval is None or self.maxval is None:
            return [
                EncoderResult(value=0, scalar=0, encoding=numpy.zeros(self.n))
            ]
        return super(AdaptiveScalarEncoder, self).topDownCompute(encoded)

    ############################################################################
    def dump(self):
        """
    Prints details about current state to stdout.
    """
        print "AdaptiveScalarEncoder:"
        print "  min: %f" % self.minval
        print "  max: %f" % self.maxval
        print "  w:   %d" % self.w
        print "  n:   %d" % self.n
        print "  resolution: %f" % self.resolution
        print "  radius:     %f" % self.radius
        print "  periodic: %s" % self.periodic
        print "  nInternal: %d" % self.nInternal
        print "  rangeInternal: %f" % self.rangeInternal
        print "  padding: %d" % self.padding

    @classmethod
    def read(cls, proto):
        encoder = super(AdaptiveScalarEncoder, cls).read(proto)
        encoder.recordNum = proto.recordNum
        encoder.slidingWindow = MovingAverage.read(proto.slidingWindow)
        return encoder

    def write(self, proto):
        super(AdaptiveScalarEncoder, self).write(proto)
        proto.recordNum = self.recordNum
        self.slidingWindow.write(proto.slidingWindow)
Ejemplo n.º 7
0
class Anomaly(object):
    """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """

    # anomaly modes supported
    MODE_PURE = "pure"
    MODE_LIKELIHOOD = "likelihood"
    MODE_WEIGHTED = "weighted"
    _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)

    def __init__(self,
                 slidingWindowSize=None,
                 mode=MODE_PURE,
                 binaryAnomalyThreshold=None):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score
         will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
         The transformation is applied after moving average is computed and updated.
    """
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)
        self._binaryThreshold = binaryAnomalyThreshold
        if binaryAnomalyThreshold is not None and (
                not isinstance(binaryAnomalyThreshold, float)
                or binaryAnomalyThreshold >= 1.0
                or binaryAnomalyThreshold <= 0.0):
            raise ValueError(
                "Anomaly: binaryAnomalyThreshold must be from (0,1) "
                "or None if disabled.")

    def compute(self,
                activeColumns,
                predictedColumns,
                inputValue=None,
                timestamp=None):
        """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
        # Start by computing the raw anomaly score.
        anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

        # Compute final anomaly based on selected mode.
        if self._mode == Anomaly.MODE_PURE:
            score = anomalyScore
        elif self._mode == Anomaly.MODE_LIKELIHOOD:
            if inputValue is None:
                raise ValueError(
                    "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                    "requires 'inputValue' as parameter to compute() method. ")

            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            # low likelihood -> hi anomaly
            score = 1 - probability
        elif self._mode == Anomaly.MODE_WEIGHTED:
            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            score = anomalyScore * (1 - probability)

        # Last, do moving-average if windowSize was specified.
        if self._movingAverage is not None:
            score = self._movingAverage.next(score)

        # apply binary discretization if required
        if self._binaryThreshold is not None:
            if score >= self._binaryThreshold:
                score = 1.0
            else:
                score = 0.0

        return score

    def __str__(self):
        windowSize = 0
        if self._movingAverage is not None:
            windowSize = self._movingAverage.windowSize
        return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize)

    def __setstate__(self, state):
        """deserialization"""
        self.__dict__.update(state)

        if not hasattr(self, '_mode'):
            self._mode = Anomaly.MODE_PURE
        if not hasattr(self, '_movingAverage'):
            self._movingAverage = None
        if not hasattr(self, '_binaryThreshold'):
            self._binaryThreshold = None
Ejemplo n.º 8
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """


  # anomaly modes supported
  MODE_PURE = "pure"
  MODE_LIKELIHOOD = "likelihood"
  MODE_WEIGHTED = "weighted"
  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)


  def compute(self, activeColumns, predictedColumns, 
				inputValue=None, timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      if inputValue is None:
        raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                 "requires 'inputValue' as parameter to compute() method. ")

      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      # low likelihood -> hi anomaly
      score = 1 - probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      score = anomalyScore * (1 - probability)

    # Last, do moving-average if windowSize was specified.
    if self._movingAverage is not None:
      score = self._movingAverage.next(score)

    return score
Ejemplo n.º 9
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  :param slidingWindowSize: [optional] - how many elements are summed up;
      enables moving average on final anomaly score; int >= 0

  :param mode: (string) [optional] how to compute anomaly, one of:

      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_PURE`
      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_LIKELIHOOD`
      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_WEIGHTED`

  :param binaryAnomalyThreshold: [optional] if set [0,1] anomaly score
       will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
       The transformation is applied after moving average is computed.

  """


  # anomaly modes supported
  MODE_PURE = "pure"
  """
  Default mode. The raw anomaly score as computed by
  :func:`~.anomaly_likelihood.computeRawAnomalyScore`
  """
  MODE_LIKELIHOOD = "likelihood"
  """
  Uses the :class:`~.anomaly_likelihood.AnomalyLikelihood` class, which models
  probability of receiving this value and anomalyScore
  """
  MODE_WEIGHTED = "weighted"
  """
  Multiplies the likelihood result with the raw anomaly score that was used to
  generate the likelihood (anomaly * likelihood)
  """

  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self,
               slidingWindowSize=None,
               mode=MODE_PURE,
               binaryAnomalyThreshold=None):
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if (self._mode == Anomaly.MODE_LIKELIHOOD or
        self._mode == Anomaly.MODE_WEIGHTED):
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    else:
      self._likelihood = None

    if not self._mode in self._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)

    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and (
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")


  def compute(self, activeColumns, predictedColumns,
              inputValue=None, timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    :param activeColumns: array of active column indices
    :param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    :param inputValue: (optional) value of current input to encoders
                                  (eg "cat" for category encoder)
                                  (used in anomaly-likelihood)
    :param timestamp: (optional) date timestamp when the sample occured
                                 (used in anomaly-likelihood)
    :returns: the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      if inputValue is None:
        raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                 "requires 'inputValue' as parameter to compute() method. ")

      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      # low likelihood -> hi anomaly
      score = 1 - probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      score = anomalyScore * (1 - probability)

    # Last, do moving-average if windowSize was specified.
    if self._movingAverage is not None:
      score = self._movingAverage.next(score)

    # apply binary discretization if required
    if self._binaryThreshold is not None:
      if score >= self._binaryThreshold:
        score = 1.0
      else:
        score = 0.0

    return score


  def __str__(self):
    windowSize = 0
    if self._movingAverage is not None:
      windowSize = self._movingAverage.windowSize
    return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize)


  def __eq__(self, other):
    return (isinstance(other, Anomaly) and
            other._mode == self._mode and
            other._binaryThreshold == self._binaryThreshold and
            other._movingAverage == self._movingAverage and
            other._likelihood == self._likelihood)


  def __setstate__(self, state):
    """deserialization"""
    self.__dict__.update(state)

    if not hasattr(self, '_mode'):
      self._mode = Anomaly.MODE_PURE
    if not hasattr(self, '_movingAverage'):
      self._movingAverage = None
    if not hasattr(self, '_binaryThreshold'):
      self._binaryThreshold = None
Ejemplo n.º 10
0
class AdaptiveScalarEncoder(ScalarEncoder):
  """
  This is an implementation of the scalar encoder that adapts the min and
  max of the scalar encoder dynamically. This is essential to the streaming
  model of the online prediction framework.

  Initialization of an adapive encoder using resolution or radius is not supported;
  it must be intitialized with n. This n is kept constant while the min and max of the
  encoder changes.

  The adaptive encoder must be have periodic set to false.

  The adaptive encoder may be initialized with a minval and maxval or with `None`
  for each of these. In the latter case, the min and max are set as the 1st and 99th
  percentile over a window of the past 100 records.

  **Note:** the sliding window may record duplicates of the values in the dataset,
  and therefore does not reflect the statistical distribution of the input data
  and may not be used to calculate the median, mean etc.
  """


  def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0,
                resolution=0, name=None, verbosity=0, clipInput=True, forced=False):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.__init__]
    """
    self._learningEnabled = True
    if periodic:
      #Adaptive scalar encoders take non-periodic inputs only
      raise Exception('Adaptive scalar encoder does not encode periodic inputs')
    assert n!=0           #An adaptive encoder can only be intialized using n

    super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval,
                                clipInput=True, name=name, verbosity=verbosity, forced=forced)
    self.recordNum=0    #how many inputs have been sent to the encoder?
    self.slidingWindow = MovingAverage(300)


  def _setEncoderParams(self):
    """
    Set the radius, resolution and range. These values are updated when minval
    and/or maxval change.
    """

    self.rangeInternal = float(self.maxval - self.minval)

    self.resolution = float(self.rangeInternal) / (self.n - self.w)
    self.radius = self.w * self.resolution
    self.range = self.rangeInternal + self.resolution

    # nInternal represents the output area excluding the possible padding on each side
    self.nInternal = self.n - 2 * self.padding

    # Invalidate the bucket values cache so that they get recomputed
    self._bucketValues = None


  def setFieldStats(self, fieldName, fieldStats):
    """
    TODO: document
    """
    #If the stats are not fully formed, ignore.
    if fieldStats[fieldName]['min'] is None or \
      fieldStats[fieldName]['max'] is None:
        return
    self.minval = fieldStats[fieldName]['min']
    self.maxval = fieldStats[fieldName]['max']
    if self.minval == self.maxval:
      self.maxval+=1
    self._setEncoderParams()


  def _setMinAndMax(self, input, learn):
    """
    Potentially change the minval and maxval using input.
    **The learn flag is currently not supported by cla regions.**
    """

    self.slidingWindow.next(input)

    if self.minval is None and self.maxval is None:
      self.minval = input
      self.maxval = input+1   #When the min and max and unspecified and only one record has been encoded
      self._setEncoderParams()

    elif learn:
      sorted = self.slidingWindow.getSlidingWindow()
      sorted.sort()

      minOverWindow = sorted[0]
      maxOverWindow = sorted[len(sorted)-1]

      if minOverWindow < self.minval:
        #initialBump = abs(self.minval-minOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2      #decrement minval more aggressively in the beginning
        if self.verbosity >= 2:
          print "Input {0!s}={1:.2f} smaller than minval {2:.2f}. Adjusting minval to {3:.2f}".format(self.name, input, self.minval, minOverWindow)
        self.minval = minOverWindow       #-initialBump
        self._setEncoderParams()

      if maxOverWindow > self.maxval:
        #initialBump = abs(self.maxval-maxOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2     #decrement maxval more aggressively in the beginning
        if self.verbosity >= 2:
          print "Input {0!s}={1:.2f} greater than maxval {2:.2f}. Adjusting maxval to {3:.2f}".format(self.name, input, self.maxval, maxOverWindow)
        self.maxval = maxOverWindow       #+initialBump
        self._setEncoderParams()


  def getBucketIndices(self, input, learn=None):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.getBucketIndices]
    """

    self.recordNum +=1
    if learn is None:
      learn = self._learningEnabled

    if type(input) is float and math.isnan(input):
      input = SENTINEL_VALUE_FOR_MISSING_DATA

    if input == SENTINEL_VALUE_FOR_MISSING_DATA:
      return [None]
    else:
      self._setMinAndMax(input, learn)
      return super(AdaptiveScalarEncoder, self).getBucketIndices(input)


  def encodeIntoArray(self, input, output,learn=None):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.encodeIntoArray]
    """

    self.recordNum +=1
    if learn is None:
      learn = self._learningEnabled
    if input == SENTINEL_VALUE_FOR_MISSING_DATA:
        output[0:self.n] = 0
    elif not math.isnan(input):
      self._setMinAndMax(input, learn)

    super(AdaptiveScalarEncoder, self).encodeIntoArray(input, output)

  def getBucketInfo(self, buckets):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.getBucketInfo]
    """

    if self.minval is None or self.maxval is None:
      return [EncoderResult(value=0, scalar=0,
                           encoding=numpy.zeros(self.n))]

    return super(AdaptiveScalarEncoder, self).getBucketInfo(buckets)


  def topDownCompute(self, encoded):
    """
    [overrides nupic.encoders.scalar.ScalarEncoder.topDownCompute]
    """

    if self.minval is None or self.maxval is None:
      return [EncoderResult(value=0, scalar=0,
                           encoding=numpy.zeros(self.n))]
    return super(AdaptiveScalarEncoder, self).topDownCompute(encoded)


  def dump(self):
    """
    Prints details about current state to stdout.
    """
    print "AdaptiveScalarEncoder:"
    print "  min: {0:f}".format(self.minval)
    print "  max: {0:f}".format(self.maxval)
    print "  w:   {0:d}".format(self.w)
    print "  n:   {0:d}".format(self.n)
    print "  resolution: {0:f}".format(self.resolution)
    print "  radius:     {0:f}".format(self.radius)
    print "  periodic: {0!s}".format(self.periodic)
    print "  nInternal: {0:d}".format(self.nInternal)
    print "  rangeInternal: {0:f}".format(self.rangeInternal)
    print "  padding: {0:d}".format(self.padding)


  @classmethod
  def read(cls, proto):
    encoder = super(AdaptiveScalarEncoder, cls).read(proto)
    encoder.recordNum = proto.recordNum
    encoder.slidingWindow = MovingAverage.read(proto.slidingWindow)
    return encoder


  def write(self, proto):
    super(AdaptiveScalarEncoder, self).write(proto)
    proto.recordNum = self.recordNum
    self.slidingWindow.write(proto.slidingWindow)
Ejemplo n.º 11
0
class Anomaly(object):
    """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """

    # anomaly modes supported
    MODE_PURE = "pure"
    MODE_LIKELIHOOD = "likelihood"
    MODE_WEIGHTED = "weighted"
    _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)

    def __init__(self, slidingWindowSize=None, mode=MODE_PURE):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if self._mode == Anomaly.MODE_LIKELIHOOD:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)

    def compute(self,
                activeColumns,
                predictedColumns,
                inputValue=None,
                timestamp=None):
        """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
        # Start by computing the raw anomaly score.
        anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

        # Compute final anomaly based on selected mode.
        if self._mode == Anomaly.MODE_PURE:
            score = anomalyScore
        elif self._mode == Anomaly.MODE_LIKELIHOOD:
            if inputValue is None:
                raise ValueError(
                    "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                    "requires 'inputValue' as parameter to compute() method. ")

            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            # low likelihood -> hi anomaly
            score = 1 - probability
        elif self._mode == Anomaly.MODE_WEIGHTED:
            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            score = anomalyScore * (1 - probability)

        # Last, do moving-average if windowSize was specified.
        if self._movingAverage is not None:
            score = self._movingAverage.next(score)

        return score