def testMovingAverage(self): """ Test that the (internal) moving average maintains the averages correctly, even for null initial condition and when the number of values goes over windowSize. Pass in integers and floats. """ historicalValues = [] total = 0 windowSize = 3 newAverage, historicalValues, total = MovingAverage.compute(historicalValues, total, 3, windowSize) self.assertEqual(newAverage, 3.0) self.assertEqual(historicalValues, [3.0]) self.assertEqual(total, 3.0) newAverage, historicalValues, total = MovingAverage.compute(historicalValues, total, 4, windowSize) self.assertEqual(newAverage, 3.5) self.assertListEqual(historicalValues, [3.0, 4.0]) self.assertEqual(total, 7.0) newAverage, historicalValues, total = MovingAverage.compute(historicalValues, total, 5.0, windowSize) self.assertEqual(newAverage, 4.0) self.assertListEqual(historicalValues, [3.0, 4.0, 5.0]) self.assertEqual(total, 12.0) # Ensure the first value gets popped newAverage, historicalValues, total = MovingAverage.compute(historicalValues, total, 6.0, windowSize) self.assertEqual(newAverage, 5.0) self.assertListEqual(historicalValues, [4.0, 5.0, 6.0]) self.assertEqual(total, 15.0)
def testMovingAverageInstance(self): """ Test that the (internal) moving average maintains the averages correctly, even for null initial condition and when the number of values goes over windowSize. Pass in integers and floats. this is for the instantce method next() """ ma = MovingAverage(windowSize=3) newAverage = ma.next(3) self.assertEqual(newAverage, 3.0) self.assertListEqual(ma.getSlidingWindow(), [3.0]) self.assertEqual(ma.total, 3.0) newAverage = ma.next(4) self.assertEqual(newAverage, 3.5) self.assertListEqual(ma.getSlidingWindow(), [3.0, 4.0]) self.assertEqual(ma.total, 7.0) newAverage = ma.next(5) self.assertEqual(newAverage, 4.0) self.assertListEqual(ma.getSlidingWindow(), [3.0, 4.0, 5.0]) self.assertEqual(ma.total, 12.0) # Ensure the first value gets popped newAverage = ma.next(6) self.assertEqual(newAverage, 5.0) self.assertListEqual(ma.getSlidingWindow(), [4.0, 5.0, 6.0]) self.assertEqual(ma.total, 15.0)
def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed and updated. """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0 ): raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.")
def testMovingAverage(self): """ Test that the (internal) moving average maintains the averages correctly, even for null initial condition and when the number of values goes over windowSize. Pass in integers and floats. """ historicalValues = [] total = 0 windowSize = 3 newAverage, historicalValues, total = (MovingAverage.compute( historicalValues, total, 3, windowSize)) self.assertEqual(newAverage, 3.0) self.assertEqual(historicalValues, [3.0]) self.assertEqual(total, 3.0) newAverage, historicalValues, total = (MovingAverage.compute( historicalValues, total, 4, windowSize)) self.assertEqual(newAverage, 3.5) self.assertListEqual(historicalValues, [3.0, 4.0]) self.assertEqual(total, 7.0) newAverage, historicalValues, total = (MovingAverage.compute( historicalValues, total, 5.0, windowSize)) self.assertEqual(newAverage, 4.0) self.assertListEqual(historicalValues, [3.0, 4.0, 5.0]) self.assertEqual(total, 12.0) # Ensure the first value gets popped newAverage, historicalValues, total = (MovingAverage.compute( historicalValues, total, 6.0, windowSize)) self.assertEqual(newAverage, 5.0) self.assertListEqual(historicalValues, [4.0, 5.0, 6.0]) self.assertEqual(total, 15.0)
def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if (self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED): self._likelihood = AnomalyLikelihood() # probabilistic anomaly else: self._likelihood = None if not self._mode in self._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0): raise ValueError( "Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.")
def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0, resolution=0, name=None, verbosity=0, clipInput=True, forced=False): """ [overrides nupic.encoders.scalar.ScalarEncoder.__init__] """ self._learningEnabled = True if periodic: #Adaptive scalar encoders take non-periodic inputs only raise Exception( 'Adaptive scalar encoder does not encode periodic inputs') assert n != 0 #An adaptive encoder can only be intialized using n super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval, clipInput=True, name=name, verbosity=verbosity, forced=forced) self.recordNum = 0 #how many inputs have been sent to the encoder? self.slidingWindow = MovingAverage(300)
def testSerialization(self): """serialization using pickle""" ma = MovingAverage(windowSize=3) ma.next(3) ma.next(4.5) ma.next(5) stored = pickle.dumps(ma) restored = pickle.loads(stored) self.assertEqual(restored, ma) self.assertEqual(ma.next(6), restored.next(6))
def _anomalyScoreMovingAverage(anomalyScores, windowSize=10, verbosity=0): """ Given a list of anomaly scores return a list of averaged records. anomalyScores is assumed to be a list of records of the form: [datetime.datetime(2013, 8, 10, 23, 0), 6.0, 1.0] Each record in the returned list list contains: [datetime, value, averagedScore] *Note:* we only average the anomaly score. """ historicalValues = [] total = 0.0 averagedRecordList = [] # Aggregated records for record in anomalyScores: # Skip (but log) records without correct number of entries if not isinstance(record, (list, tuple)) or len(record) != 3: if verbosity >= 1: print("Malformed record:", record) continue avg, historicalValues, total = MovingAverage.compute(historicalValues, total, record[2], windowSize) averagedRecordList.append([record[0], record[1], avg]) if verbosity > 2: print("Aggregating input record:", record) print("Result:", [record[0], record[1], avg]) return averagedRecordList, historicalValues, total
def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if (self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED): self._likelihood = AnomalyLikelihood() # probabilistic anomaly else: self._likelihood = None if not self._mode in self._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0 ): raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.")
def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0, resolution=0, name=None, verbosity=0, clipInput=True, forced=False): self._learningEnabled = True if periodic: #Adaptive scalar encoders take non-periodic inputs only raise Exception('Adaptive scalar encoder does not encode periodic inputs') assert n!=0 #An adaptive encoder can only be intialized using n super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval, clipInput=True, name=name, verbosity=verbosity, forced=forced) self.recordNum=0 #how many inputs have been sent to the encoder? self.slidingWindow = MovingAverage(300)
def testMovingAverageReadWrite(self): ma = MovingAverage(windowSize=3) ma.next(3) ma.next(4) ma.next(5) proto1 = MovingAverageProto.new_message() ma.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = MovingAverageProto.read(f) resurrectedMa = MovingAverage.read(proto2) newAverage = ma.next(6) self.assertEqual(newAverage, resurrectedMa.next(6)) self.assertListEqual(ma.getSlidingWindow(), resurrectedMa.getSlidingWindow()) self.assertEqual(ma.total, resurrectedMa.total)
def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0, resolution=0, name=None, verbosity=0, clipInput=True, forced=False): """ [overrides nupic.encoders.scalar.ScalarEncoder.__init__] """ self._learningEnabled = True if periodic: #Adaptive scalar encoders take non-periodic inputs only raise Exception('Adaptive scalar encoder does not encode periodic inputs') # check_later: is there any instance where adaptive adaptive input is periodic? assert n!=0 #An adaptive encoder can only be intialized using n super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval, clipInput=True, name=name, verbosity=verbosity, forced=forced) # to_note: access ScalarEncoder's __init__ self.recordNum=0 #how many inputs have been sent to the encoder? self.slidingWindow = MovingAverage(300)
def testMovingAverageSlidingWindowInit(self): """ Test the slidingWindow value is correctly assigned when initializing a new MovingAverage object. """ # With exisiting historical values; same values as tested in testMovingAverage() ma = MovingAverage(windowSize=3, existingHistoricalValues=[3.0, 4.0, 5.0]) self.assertListEqual(ma.getSlidingWindow(), [3.0, 4.0, 5.0]) # Withoout exisiting historical values ma = MovingAverage(windowSize=3) self.assertListEqual(ma.getSlidingWindow(), [])
def _anomalyScoreMovingAverage(anomalyScores, windowSize=10, verbosity=0, ): """ Given a list of anomaly scores return a list of averaged records. anomalyScores is assumed to be a list of records of the form: [datetime.datetime(2013, 8, 10, 23, 0), 6.0, 1.0] Each record in the returned list list contains: [datetime, value, averagedScore] *Note:* we only average the anomaly score. """ historicalValues = [] total = 0.0 averagedRecordList = [] # Aggregated records for record in anomalyScores: # Skip (but log) records without correct number of entries if not isinstance(record, (list, tuple)) or len(record) != 3: if verbosity >= 1: print "Malformed record:", record continue avg, historicalValues, total = ( MovingAverage.compute(historicalValues, total, record[2], windowSize) ) averagedRecordList.append( [record[0], record[1], avg] ) if verbosity > 2: print "Aggregating input record:", record print "Result:", [record[0], record[1], avg] return averagedRecordList, historicalValues, total
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) return score
class AdaptiveScalarEncoder(ScalarEncoder): """ This is an implementation of the scalar encoder that adapts the min and max of the scalar encoder dynamically. This is essential to the streaming model of the online prediction framework. Initialization of an adapive encoder using resolution or radius is not supported; it must be intitialized with n. This n is kept constant while the min and max of the encoder changes. The adaptive encoder must be have periodic set to false. The adaptive encoder may be initialized with a minval and maxval or with `None` for each of these. In the latter case, the min and max are set as the 1st and 99th percentile over a window of the past 100 records. **Note:** the sliding window may record duplicates of the values in the dataset, and therefore does not reflect the statistical distribution of the input data and may not be used to calculate the median, mean etc. """ ############################################################################ def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0, resolution=0, name=None, verbosity=0, clipInput=True, forced=False): """ [overrides nupic.encoders.scalar.ScalarEncoder.__init__] """ self._learningEnabled = True if periodic: #Adaptive scalar encoders take non-periodic inputs only raise Exception( 'Adaptive scalar encoder does not encode periodic inputs') assert n != 0 #An adaptive encoder can only be intialized using n super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval, clipInput=True, name=name, verbosity=verbosity, forced=forced) self.recordNum = 0 #how many inputs have been sent to the encoder? self.slidingWindow = MovingAverage(300) ############################################################################ def _setEncoderParams(self): """ Set the radius, resolution and range. These values are updated when minval and/or maxval change. """ self.rangeInternal = float(self.maxval - self.minval) self.resolution = float(self.rangeInternal) / (self.n - self.w) self.radius = self.w * self.resolution self.range = self.rangeInternal + self.resolution # nInternal represents the output area excluding the possible padding on each side self.nInternal = self.n - 2 * self.padding # Invalidate the bucket values cache so that they get recomputed self._bucketValues = None ############################################################################ def setFieldStats(self, fieldName, fieldStats): """ TODO: document """ #If the stats are not fully formed, ignore. if fieldStats[fieldName]['min'] == None or \ fieldStats[fieldName]['max'] == None: return self.minval = fieldStats[fieldName]['min'] self.maxval = fieldStats[fieldName]['max'] if self.minval == self.maxval: self.maxval += 1 self._setEncoderParams() ############################################################################ def _setMinAndMax(self, input, learn): """ Potentially change the minval and maxval using input. **The learn flag is currently not supported by cla regions.** """ self.slidingWindow.next(input) if self.minval is None and self.maxval is None: self.minval = input self.maxval = input + 1 #When the min and max and unspecified and only one record has been encoded self._setEncoderParams() elif learn: sorted = self.slidingWindow.getSlidingWindow() sorted.sort() minOverWindow = sorted[0] maxOverWindow = sorted[len(sorted) - 1] if minOverWindow < self.minval: #initialBump = abs(self.minval-minOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2 #decrement minval more aggressively in the beginning if self.verbosity >= 2: print "Input %s=%.2f smaller than minval %.2f. Adjusting minval to %.2f"\ % (self.name, input, self.minval, minOverWindow) self.minval = minOverWindow #-initialBump self._setEncoderParams() if maxOverWindow > self.maxval: #initialBump = abs(self.maxval-maxOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2 #decrement maxval more aggressively in the beginning if self.verbosity >= 2: print "Input %s=%.2f greater than maxval %.2f. Adjusting maxval to %.2f" \ % (self.name, input, self.maxval, maxOverWindow) self.maxval = maxOverWindow #+initialBump self._setEncoderParams() ############################################################################ def getBucketIndices(self, input, learn=None): """ [overrides nupic.encoders.scalar.ScalarEncoder.getBucketIndices] """ self.recordNum += 1 if learn is None: learn = self._learningEnabled if type(input) is float and math.isnan(input): input = SENTINEL_VALUE_FOR_MISSING_DATA if input == SENTINEL_VALUE_FOR_MISSING_DATA: return [None] else: self._setMinAndMax(input, learn) return super(AdaptiveScalarEncoder, self).getBucketIndices(input) ############################################################################ def encodeIntoArray(self, input, output, learn=None): """ [overrides nupic.encoders.scalar.ScalarEncoder.encodeIntoArray] """ self.recordNum += 1 if learn is None: learn = self._learningEnabled if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:self.n] = 0 elif not math.isnan(input): self._setMinAndMax(input, learn) super(AdaptiveScalarEncoder, self).encodeIntoArray(input, output) ############################################################################ def getBucketInfo(self, buckets): """ [overrides nupic.encoders.scalar.ScalarEncoder.getBucketInfo] """ if self.minval is None or self.maxval is None: return [ EncoderResult(value=0, scalar=0, encoding=numpy.zeros(self.n)) ] return super(AdaptiveScalarEncoder, self).getBucketInfo(buckets) ############################################################################ def topDownCompute(self, encoded): """ [overrides nupic.encoders.scalar.ScalarEncoder.topDownCompute] """ if self.minval is None or self.maxval is None: return [ EncoderResult(value=0, scalar=0, encoding=numpy.zeros(self.n)) ] return super(AdaptiveScalarEncoder, self).topDownCompute(encoded) ############################################################################ def dump(self): """ Prints details about current state to stdout. """ print "AdaptiveScalarEncoder:" print " min: %f" % self.minval print " max: %f" % self.maxval print " w: %d" % self.w print " n: %d" % self.n print " resolution: %f" % self.resolution print " radius: %f" % self.radius print " periodic: %s" % self.periodic print " nInternal: %d" % self.nInternal print " rangeInternal: %f" % self.rangeInternal print " padding: %d" % self.padding @classmethod def read(cls, proto): encoder = super(AdaptiveScalarEncoder, cls).read(proto) encoder.recordNum = proto.recordNum encoder.slidingWindow = MovingAverage.read(proto.slidingWindow) return encoder def write(self, proto): super(AdaptiveScalarEncoder, self).write(proto) proto.recordNum = self.recordNum self.slidingWindow.write(proto.slidingWindow)
def read(cls, proto): encoder = super(AdaptiveScalarEncoder, cls).read(proto) encoder.recordNum = proto.recordNum encoder.slidingWindow = MovingAverage.read(proto.slidingWindow) return encoder
def updateAnomalyLikelihoods(anomalyScores, params, verbosity=0): # pylint: disable=W0613 """ Compute updated probabilities for anomalyScores using the given params. :param anomalyScores: a list of records. Each record is a list with the following three elements: [timestamp, value, score] Example:: [datetime.datetime(2013, 8, 10, 23, 0), 6.0, 1.0] :param params: the JSON dict returned by estimateAnomalyLikelihoods :param verbosity: integer controlling extent of printouts for debugging :type verbosity: int :returns: 3-tuple consisting of: - likelihoods numpy array of likelihoods, one for each aggregated point - avgRecordList list of averaged input records - params an updated JSON object containing the state of this metric. """ if verbosity > 3: print "In updateAnomalyLikelihoods." print "Number of anomaly scores:", len(anomalyScores) print "First 20:", anomalyScores[0:min(20, len(anomalyScores))] print "Params:", params if len(anomalyScores) == 0: raise ValueError("Must have at least one anomalyScore") if not isValidEstimatorParams(params): raise ValueError("'params' is not a valid params structure") # For backward compatibility. if not params.has_key("historicalLikelihoods"): params["historicalLikelihoods"] = [1.0] # Compute moving averages of these new scores using the previous values # as well as likelihood for these scores using the old estimator historicalValues = params["movingAverage"]["historicalValues"] total = params["movingAverage"]["total"] windowSize = params["movingAverage"]["windowSize"] aggRecordList = numpy.zeros(len(anomalyScores), dtype=float) likelihoods = numpy.zeros(len(anomalyScores), dtype=float) for i, v in enumerate(anomalyScores): newAverage, historicalValues, total = ( MovingAverage.compute(historicalValues, total, v[2], windowSize) ) aggRecordList[i] = newAverage likelihoods[i] = normalProbability(newAverage, params["distribution"]) # Filter the likelihood values. First we prepend the historical likelihoods # to the current set. Then we filter the values. We peel off the likelihoods # to return and the last windowSize values to store for later. likelihoods2 = params["historicalLikelihoods"] + list(likelihoods) filteredLikelihoods = _filterLikelihoods(likelihoods2) likelihoods[:] = filteredLikelihoods[-len(likelihoods):] historicalLikelihoods = likelihoods2[-min(windowSize, len(likelihoods2)):] # Update the estimator newParams = { "distribution": params["distribution"], "movingAverage": { "historicalValues": historicalValues, "total": total, "windowSize": windowSize, }, "historicalLikelihoods": historicalLikelihoods, } assert len(newParams["historicalLikelihoods"]) <= windowSize if verbosity > 3: print "Number of likelihoods:", len(likelihoods) print "First 20 likelihoods:", likelihoods[0:min(20, len(likelihoods))] print "Leaving updateAnomalyLikelihoods." return (likelihoods, aggRecordList, newParams)
def updateAnomalyLikelihoods(anomalyScores, params, verbosity=0): """ Compute updated probabilities for anomalyScores using the given params. :param anomalyScores: a list of records. Each record is a list with the following three elements: [timestamp, value, score] Example:: [datetime.datetime(2013, 8, 10, 23, 0), 6.0, 1.0] :param params: the JSON dict returned by estimateAnomalyLikelihoods :param verbosity: integer controlling extent of printouts for debugging :type verbosity: int :returns: 3-tuple consisting of: - likelihoods numpy array of likelihoods, one for each aggregated point - avgRecordList list of averaged input records - params an updated JSON object containing the state of this metric. """ if verbosity > 3: print "In updateAnomalyLikelihoods." print "Number of anomaly scores:", len(anomalyScores) print "First 20:", anomalyScores[0:min(20, len(anomalyScores))] print "Params:", params if len(anomalyScores) == 0: raise ValueError("Must have at least one anomalyScore") if not isValidEstimatorParams(params): raise ValueError("'params' is not a valid params structure") # For backward compatibility. if not params.has_key("historicalLikelihoods"): params["historicalLikelihoods"] = [1.0] # Compute moving averages of these new scores using the previous values # as well as likelihood for these scores using the old estimator historicalValues = params["movingAverage"]["historicalValues"] total = params["movingAverage"]["total"] windowSize = params["movingAverage"]["windowSize"] aggRecordList = numpy.zeros(len(anomalyScores), dtype=float) likelihoods = numpy.zeros(len(anomalyScores), dtype=float) for i, v in enumerate(anomalyScores): newAverage, historicalValues, total = ( MovingAverage.compute(historicalValues, total, v[2], windowSize) ) aggRecordList[i] = newAverage likelihoods[i] = normalProbability(newAverage, params["distribution"]) # Filter the likelihood values. First we prepend the historical likelihoods # to the current set. Then we filter the values. We peel off the likelihoods # to return and the last windowSize values to store for later. likelihoods2 = params["historicalLikelihoods"] + list(likelihoods) filteredLikelihoods = _filterLikelihoods(likelihoods2) likelihoods[:] = filteredLikelihoods[-len(likelihoods):] historicalLikelihoods = likelihoods2[-min(windowSize, len(likelihoods2)):] # Update the estimator newParams = { "distribution": params["distribution"], "movingAverage": { "historicalValues": historicalValues, "total": total, "windowSize": windowSize, }, "historicalLikelihoods": historicalLikelihoods, } assert len(newParams["historicalLikelihoods"]) <= windowSize if verbosity > 3: print "Number of likelihoods:", len(likelihoods) print "First 20 likelihoods:", likelihoods[0:min(20, len(likelihoods))] print "Leaving updateAnomalyLikelihoods." return (likelihoods, aggRecordList, newParams)
def testEquals(self): ma = MovingAverage(windowSize=3) maP = MovingAverage(windowSize=3) self.assertEqual(ma, maP) maN = MovingAverage(windowSize=10) self.assertNotEqual(ma, maN) ma = MovingAverage(windowSize=2, existingHistoricalValues=[3.0, 4.0, 5.0]) maP = MovingAverage(windowSize=2, existingHistoricalValues=[3.0, 4.0, 5.0]) self.assertEqual(ma, maP) maP.next(6) self.assertNotEqual(ma, maP) ma.next(6) self.assertEqual(ma, maP)
def testMovingAverageReadWrite(self): ma = MovingAverage(windowSize=3) ma.next(3) ma.next(4) ma.next(5) proto1 = MovingAverageProto.new_message() ma.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = MovingAverageProto.read(f) resurrectedMa = MovingAverage.read(proto2) newAverage = ma.next(6) self.assertEqual(newAverage, resurrectedMa.next(6)) self.assertListEqual(ma.getSlidingWindow(), resurrectedMa.getSlidingWindow()) self.assertEqual(ma.total, resurrectedMa.total) self.assertTrue(ma, resurrectedMa) #using the __eq__ method
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed and updated. """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0): raise ValueError( "Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.") def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError( "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) # apply binary discretization if required if self._binaryThreshold is not None: if score >= self._binaryThreshold: score = 1.0 else: score = 0.0 return score def __str__(self): windowSize = 0 if self._movingAverage is not None: windowSize = self._movingAverage.windowSize return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize) def __setstate__(self, state): """deserialization""" self.__dict__.update(state) if not hasattr(self, '_mode'): self._mode = Anomaly.MODE_PURE if not hasattr(self, '_movingAverage'): self._movingAverage = None if not hasattr(self, '_binaryThreshold'): self._binaryThreshold = None
class AdaptiveScalarEncoder(ScalarEncoder): """ This is an implementation of the scalar encoder that adapts the min and max of the scalar encoder dynamically. This is essential to the streaming model of the online prediction framework. Initialization of an adapive encoder using resolution or radius is not supported; it must be intitialized with n. This n is kept constant while the min and max of the encoder changes. The adaptive encoder must be have periodic set to false. The adaptive encoder may be initialized with a minval and maxval or with `None` for each of these. In the latter case, the min and max are set as the 1st and 99th percentile over a window of the past 100 records. **Note:** the sliding window may record duplicates of the values in the dataset, and therefore does not reflect the statistical distribution of the input data and may not be used to calculate the median, mean etc. """ def __init__(self, w, minval=None, maxval=None, periodic=False, n=0, radius=0, resolution=0, name=None, verbosity=0, clipInput=True, forced=False): """ [overrides nupic.encoders.scalar.ScalarEncoder.__init__] """ self._learningEnabled = True if periodic: #Adaptive scalar encoders take non-periodic inputs only raise Exception('Adaptive scalar encoder does not encode periodic inputs') assert n!=0 #An adaptive encoder can only be intialized using n super(AdaptiveScalarEncoder, self).__init__(w=w, n=n, minval=minval, maxval=maxval, clipInput=True, name=name, verbosity=verbosity, forced=forced) self.recordNum=0 #how many inputs have been sent to the encoder? self.slidingWindow = MovingAverage(300) def _setEncoderParams(self): """ Set the radius, resolution and range. These values are updated when minval and/or maxval change. """ self.rangeInternal = float(self.maxval - self.minval) self.resolution = float(self.rangeInternal) / (self.n - self.w) self.radius = self.w * self.resolution self.range = self.rangeInternal + self.resolution # nInternal represents the output area excluding the possible padding on each side self.nInternal = self.n - 2 * self.padding # Invalidate the bucket values cache so that they get recomputed self._bucketValues = None def setFieldStats(self, fieldName, fieldStats): """ TODO: document """ #If the stats are not fully formed, ignore. if fieldStats[fieldName]['min'] is None or \ fieldStats[fieldName]['max'] is None: return self.minval = fieldStats[fieldName]['min'] self.maxval = fieldStats[fieldName]['max'] if self.minval == self.maxval: self.maxval+=1 self._setEncoderParams() def _setMinAndMax(self, input, learn): """ Potentially change the minval and maxval using input. **The learn flag is currently not supported by cla regions.** """ self.slidingWindow.next(input) if self.minval is None and self.maxval is None: self.minval = input self.maxval = input+1 #When the min and max and unspecified and only one record has been encoded self._setEncoderParams() elif learn: sorted = self.slidingWindow.getSlidingWindow() sorted.sort() minOverWindow = sorted[0] maxOverWindow = sorted[len(sorted)-1] if minOverWindow < self.minval: #initialBump = abs(self.minval-minOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2 #decrement minval more aggressively in the beginning if self.verbosity >= 2: print "Input {0!s}={1:.2f} smaller than minval {2:.2f}. Adjusting minval to {3:.2f}".format(self.name, input, self.minval, minOverWindow) self.minval = minOverWindow #-initialBump self._setEncoderParams() if maxOverWindow > self.maxval: #initialBump = abs(self.maxval-maxOverWindow)*(1-(min(self.recordNum, 200.0)/200.0))*2 #decrement maxval more aggressively in the beginning if self.verbosity >= 2: print "Input {0!s}={1:.2f} greater than maxval {2:.2f}. Adjusting maxval to {3:.2f}".format(self.name, input, self.maxval, maxOverWindow) self.maxval = maxOverWindow #+initialBump self._setEncoderParams() def getBucketIndices(self, input, learn=None): """ [overrides nupic.encoders.scalar.ScalarEncoder.getBucketIndices] """ self.recordNum +=1 if learn is None: learn = self._learningEnabled if type(input) is float and math.isnan(input): input = SENTINEL_VALUE_FOR_MISSING_DATA if input == SENTINEL_VALUE_FOR_MISSING_DATA: return [None] else: self._setMinAndMax(input, learn) return super(AdaptiveScalarEncoder, self).getBucketIndices(input) def encodeIntoArray(self, input, output,learn=None): """ [overrides nupic.encoders.scalar.ScalarEncoder.encodeIntoArray] """ self.recordNum +=1 if learn is None: learn = self._learningEnabled if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:self.n] = 0 elif not math.isnan(input): self._setMinAndMax(input, learn) super(AdaptiveScalarEncoder, self).encodeIntoArray(input, output) def getBucketInfo(self, buckets): """ [overrides nupic.encoders.scalar.ScalarEncoder.getBucketInfo] """ if self.minval is None or self.maxval is None: return [EncoderResult(value=0, scalar=0, encoding=numpy.zeros(self.n))] return super(AdaptiveScalarEncoder, self).getBucketInfo(buckets) def topDownCompute(self, encoded): """ [overrides nupic.encoders.scalar.ScalarEncoder.topDownCompute] """ if self.minval is None or self.maxval is None: return [EncoderResult(value=0, scalar=0, encoding=numpy.zeros(self.n))] return super(AdaptiveScalarEncoder, self).topDownCompute(encoded) def dump(self): """ Prints details about current state to stdout. """ print "AdaptiveScalarEncoder:" print " min: {0:f}".format(self.minval) print " max: {0:f}".format(self.maxval) print " w: {0:d}".format(self.w) print " n: {0:d}".format(self.n) print " resolution: {0:f}".format(self.resolution) print " radius: {0:f}".format(self.radius) print " periodic: {0!s}".format(self.periodic) print " nInternal: {0:d}".format(self.nInternal) print " rangeInternal: {0:f}".format(self.rangeInternal) print " padding: {0:d}".format(self.padding) @classmethod def read(cls, proto): encoder = super(AdaptiveScalarEncoder, cls).read(proto) encoder.recordNum = proto.recordNum encoder.slidingWindow = MovingAverage.read(proto.slidingWindow) return encoder def write(self, proto): super(AdaptiveScalarEncoder, self).write(proto) proto.recordNum = self.recordNum self.slidingWindow.write(proto.slidingWindow)
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError( "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) return score
class Anomaly(object): """Utility class for generating anomaly scores in different ways. :param slidingWindowSize: [optional] - how many elements are summed up; enables moving average on final anomaly score; int >= 0 :param mode: (string) [optional] how to compute anomaly, one of: - :const:`nupic.algorithms.anomaly.Anomaly.MODE_PURE` - :const:`nupic.algorithms.anomaly.Anomaly.MODE_LIKELIHOOD` - :const:`nupic.algorithms.anomaly.Anomaly.MODE_WEIGHTED` :param binaryAnomalyThreshold: [optional] if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed. """ # anomaly modes supported MODE_PURE = "pure" """ Default mode. The raw anomaly score as computed by :func:`~.anomaly_likelihood.computeRawAnomalyScore` """ MODE_LIKELIHOOD = "likelihood" """ Uses the :class:`~.anomaly_likelihood.AnomalyLikelihood` class, which models probability of receiving this value and anomalyScore """ MODE_WEIGHTED = "weighted" """ Multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood (anomaly * likelihood) """ _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if (self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED): self._likelihood = AnomalyLikelihood() # probabilistic anomaly else: self._likelihood = None if not self._mode in self._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0 ): raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.") def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. :param activeColumns: array of active column indices :param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) :param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) :param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) :returns: the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) # apply binary discretization if required if self._binaryThreshold is not None: if score >= self._binaryThreshold: score = 1.0 else: score = 0.0 return score def __str__(self): windowSize = 0 if self._movingAverage is not None: windowSize = self._movingAverage.windowSize return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize) def __eq__(self, other): return (isinstance(other, Anomaly) and other._mode == self._mode and other._binaryThreshold == self._binaryThreshold and other._movingAverage == self._movingAverage and other._likelihood == self._likelihood) def __setstate__(self, state): """deserialization""" self.__dict__.update(state) if not hasattr(self, '_mode'): self._mode = Anomaly.MODE_PURE if not hasattr(self, '_movingAverage'): self._movingAverage = None if not hasattr(self, '_binaryThreshold'): self._binaryThreshold = None