def compute_scores(y_test, y_pred, normalize=False): # Errors errors = np.array((y_test - y_pred)**2) if normalize: errors = errors / float(errors.max() - errors.min()) # Log likelihood. log_likelihoods = [] anomaly_likelihood = AnomalyLikelihood() for i in range(len(y_test)): likelihood = anomaly_likelihood.anomalyProbability(y_test[i], errors[i], timestamp=None) log_likelihood = anomaly_likelihood.computeLogLikelihood(likelihood) log_likelihoods.append(log_likelihood) # Anomaly thresholds: # - HIGH: log_likelihood >= 0.5 # - MEDIUM: 0.5 > log_likelihood >= 0.4 N = len(log_likelihoods) anomalies = {'high': np.zeros(N), 'medium': np.zeros(N)} x = np.array(log_likelihoods) high_idx = x >= 0.5 anomalies['high'][high_idx] = 1 # medium_idx = np.logical_and(x >= 0.4, x < 0.5) # anomalies['medium'][medium_idx] = 1 return errors, log_likelihoods, anomalies
class buildmodel: def __init__(self): #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp") with open("model_params.json") as fp: self.model_params = json.load(fp) print self.model_params self.newmodel = ModelFactory.create(self.model_params) self.newmodel.enableLearning() self.newmodel.enableInference({"predictedField": "value"}) self.DATE_FORMAT = "%d/%m/%Y %H:%M" self.anomalylikelihood = AnomalyLikelihood() def processdata(self, data): timestamp = datetime.datetime.strptime(data[0], self.DATE_FORMAT) ce = float(data[1]) result = self.newmodel.run({"dttm": timestamp, "value": ce}) #print result anomalyScore = result.inferences["anomalyScore"] anomaly = self.anomalylikelihood.anomalyProbability( ce, anomalyScore, timestamp) logLikelihood = self.anomalylikelihood.computeLogLikelihood(anomaly) logLikelihood = logLikelihood * 100 print logLikelihood '''if anomaly > 0.999: print "Detected high level anomaly at "+str(timestamp) elif anomaly>0.958: print "Detected medium level anomaly at "+str(timestamp)''' if logLikelihood > 20: print "Detected high level anomaly at " + str(timestamp) elif logLikelihood > 15: print "Detected medium level anomaly at " + str(timestamp)
def runAvogadroAnomaly(metric, options): """ Create a new HTM Model, fetch the data from the local DB, process it in NuPIC, and save the results to a new CSV output file. :param metric: AvogadroAgent metric class :param options: CLI Options """ model = createModel(metric) model.enableInference({"predictedField": metric.name}) fetched = metric.fetch(prefix=options.prefix, start=None) resultFile = open( os.path.join(options.prefix, metric.name + "-result.csv"), "wb") csvWriter = csv.writer(resultFile) csvWriter.writerow([ "timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood", "color" ]) headers = ("timestamp", metric.name) anomalyLikelihood = AnomalyLikelihood() for (ts, value) in fetched: try: value = float(value) except (ValueError, TypeError): continue if not math.isnan(value): modelInput = dict(zip(headers, (ts, value))) modelInput[metric.name] = float(value) modelInput["timestamp"] = datetime.datetime.fromtimestamp( float(modelInput["timestamp"])) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] likelihood = anomalyLikelihood.anomalyProbability( modelInput[metric.name], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if logLikelihood > .5: color = "red" elif logLikelihood > .4 and logLikelihood <= .5: color = "yellow" else: color = "green" csvWriter.writerow([ modelInput["timestamp"], float(value), anomalyScore, logLikelihood, color ]) else: resultFile.flush()
def runAvogadroAnomaly(metric, options): """ Create a new HTM Model, fetch the data from the local DB, process it in NuPIC, and save the results to a new CSV output file. :param metric: AvogadroAgent metric class :param options: CLI Options """ model = createModel(metric) model.enableInference({"predictedField": metric.name}) fetched = metric.fetch(prefix=options.prefix, start=None) resultFile = open(os.path.join(options.prefix, metric.name + "-result.csv"), "wb") csvWriter = csv.writer(resultFile) csvWriter.writerow(["timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood", "color"]) headers = ("timestamp", metric.name) anomalyLikelihood = AnomalyLikelihood() for (ts, value) in fetched: try: value = float(value) except (ValueError, TypeError): continue if not math.isnan(value): modelInput = dict(zip(headers, (ts, value))) modelInput[metric.name] = float(value) modelInput["timestamp"] = datetime.datetime.fromtimestamp( float(modelInput["timestamp"])) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] likelihood = anomalyLikelihood.anomalyProbability( modelInput[metric.name], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if logLikelihood > .5: color = "red" elif logLikelihood > .4 and logLikelihood <= .5: color = "yellow" else: color = "green" csvWriter.writerow([modelInput["timestamp"], float(value), anomalyScore, logLikelihood, color]) else: resultFile.flush()
def testLikelihoodValues(self): """ test to see if the region keeps track of state correctly and produces the same likelihoods as the AnomalyLikelihood module """ anomalyLikelihoodRegion = AnomalyLikelihoodRegion() anomalyLikelihood = AnomalyLikelihood() inputs = AnomalyLikelihoodRegion.getSpec()['inputs'] outputs = AnomalyLikelihoodRegion.getSpec()['outputs'] with open(_INPUT_DATA_FILE) as f: reader = csv.reader(f) reader.next() for record in reader: consumption = float(record[1]) anomalyScore = float(record[2]) likelihood1 = anomalyLikelihood.anomalyProbability( consumption, anomalyScore) inputs['rawAnomalyScore'] = numpy.array([anomalyScore]) inputs['metricValue'] = numpy.array([consumption]) anomalyLikelihoodRegion.compute(inputs, outputs) likelihood2 = outputs['anomalyLikelihood'][0] self.assertEqual(likelihood1, likelihood2)
def testLikelihoodValues(self): """ test to see if the region keeps track of state correctly and produces the same likelihoods as the AnomalyLikelihood module """ anomalyLikelihoodRegion = AnomalyLikelihoodRegion() anomalyLikelihood = AnomalyLikelihood() inputs = AnomalyLikelihoodRegion.getSpec()['inputs'] outputs = AnomalyLikelihoodRegion.getSpec()['outputs'] with open (_INPUT_DATA_FILE) as f: reader = csv.reader(f) reader.next() for record in reader: consumption = float(record[1]) anomalyScore = float(record[2]) likelihood1 = anomalyLikelihood.anomalyProbability( consumption, anomalyScore) inputs['rawAnomalyScore'] = numpy.array([anomalyScore]) inputs['metricValue'] = numpy.array([consumption]) anomalyLikelihoodRegion.compute(inputs, outputs) likelihood2 = outputs['anomalyLikelihood'][0] self.assertEqual(likelihood1, likelihood2)
def runAnomaly(options): """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders']['value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open (options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile,"wb")) csvWriter.writerow(["timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score"]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at",datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if likelihood > 0.9999: print "Anomaly detected:",inputData['dttm'],inputData['value'],likelihood # Write results to the output CSV file csvWriter.writerow([inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood]) # Progress report if (i%1000) == 0: print i,"records processed" print "Completed processing",i,"records at",datetime.datetime.now() print "Anomaly scores for",options.inputFile, print "have been written to",options.outputFile
class Anomaly(object): """Utility class for generating anomaly scores in different ways. :param slidingWindowSize: [optional] - how many elements are summed up; enables moving average on final anomaly score; int >= 0 :param mode: (string) [optional] how to compute anomaly, one of: - :const:`nupic.algorithms.anomaly.Anomaly.MODE_PURE` - :const:`nupic.algorithms.anomaly.Anomaly.MODE_LIKELIHOOD` - :const:`nupic.algorithms.anomaly.Anomaly.MODE_WEIGHTED` :param binaryAnomalyThreshold: [optional] if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed. """ # anomaly modes supported MODE_PURE = "pure" """ Default mode. The raw anomaly score as computed by :func:`~.anomaly_likelihood.computeRawAnomalyScore` """ MODE_LIKELIHOOD = "likelihood" """ Uses the :class:`~.anomaly_likelihood.AnomalyLikelihood` class, which models probability of receiving this value and anomalyScore """ MODE_WEIGHTED = "weighted" """ Multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood (anomaly * likelihood) """ _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if (self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED): self._likelihood = AnomalyLikelihood() # probabilistic anomaly else: self._likelihood = None if not self._mode in self._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0 ): raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.") def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. :param activeColumns: array of active column indices :param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) :param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) :param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) :returns: the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) # apply binary discretization if required if self._binaryThreshold is not None: if score >= self._binaryThreshold: score = 1.0 else: score = 0.0 return score def __str__(self): windowSize = 0 if self._movingAverage is not None: windowSize = self._movingAverage.windowSize return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize) def __eq__(self, other): return (isinstance(other, Anomaly) and other._mode == self._mode and other._binaryThreshold == self._binaryThreshold and other._movingAverage == self._movingAverage and other._likelihood == self._likelihood) def __setstate__(self, state): """deserialization""" self.__dict__.update(state) if not hasattr(self, '_mode'): self._mode = Anomaly.MODE_PURE if not hasattr(self, '_movingAverage'): self._movingAverage = None if not hasattr(self, '_binaryThreshold'): self._binaryThreshold = None
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ # Input column meta info compatible with parameters generated by # getScalarMetricWithTimeOfDayAnomalyParams _INPUT_RECORD_SCHEMA = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) def __init__(self, modelId, stats): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. """ self._modelId = modelId # NOTE: ModelRecordEncoder is implemented in the pull request # https://github.com/numenta/nupic/pull/2432 that is not yet in master. self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats) self._anomalyLikelihood = AnomalyLikelihood() @classmethod def _createModel(cls, stats): """Instantiate and configure an OPF model :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :returns: OPF Model instance """ # Generate swarm params swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=stats["min"], maxVal=stats["max"], minResolution=stats.get("minResolution")) model = ModelFactory.create(modelConfig=swarmParams["modelConfig"]) model.enableLearning() model.enableInference(swarmParams["inferenceArgs"]) return model @classmethod def _readInputMessages(cls): """Create a generator that waits for and yields input messages from stdin yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the `datetime.datetime` timestamp of the metric data sample and <scalar-value> is the floating point value of the metric data sample. """ while True: message = sys.stdin.readline() if message: timestamp, scalarValue = json.loads(message) yield (datetime.utcfromtimestamp(timestamp), scalarValue) else: # Front End closed the pipe (or died) break @classmethod def _emitOutputMessage(cls, rowIndex, anomalyProbability): """Emit output message to stdout :param int rowIndex: 0-based index of corresponding input sample :param float anomalyProbability: computed anomaly probability value """ message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyProbability(self, inputRow): """ Compute anomaly log likelihood score :param tuple inputRow: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Log-scaled anomaly probability :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(inputRow) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood score anomalyProbability = self._anomalyLikelihood.anomalyProbability( value=inputRow[1], anomalyScore=rawAnomalyScore, timestamp=inputRow[0]) return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ g_log.info("Processing model=%s", self._modelId) for rowIndex, inputRow in enumerate(self._readInputMessages()): anomalyProbability = self._computeAnomalyProbability(inputRow) self._emitOutputMessage(rowIndex=rowIndex, anomalyProbability=anomalyProbability)
def run_model(model, a, b, save=True, aggregate=False, string=''): """Runs the HTM model and generates the anomaly scores. Arguments: :model: the model created with create_model(). :a: the beginning of the anylized signal. :b: the end of the anylized signal. :save: if True then the anomalies output will be saved as .txt. :string: the string to differentiate the name of the saved .txt files. """ ######################### open the signs ########################################### if aggregate == True: signal, time_vect = aggregate_(a, b) print("the size of signal is: {i}".format(i=np.size(signal))) else: signal = open_signs() signal = signal[a:b, 1] #----------------------------------------------------------------------------------- ##################### declare the anomalies lists ################################## anom_scores = [] anom_likelihood = [] anom_loglikelihood = [] #----------------------------------------------------------------------------------- ##################### declare the predicted list ################################### predictions_1 = [] predictions_5 = [] predictions_1.append(0) for i in range(5): predictions_5.append( 0 ) # as this prediction is always made 1 step ahead, then the first value predicted will be ... # the prediction of the index with number 1, therefore doesn't exist a prediction of the 0 ... # index. The same problem occurs with the last signal, because it will predict one more ... # step ahead, this means that after seen the last signal "A", it will predict "A+1" even it doesnt ... # having a matching value in the signal array. #----------------------------------------------------------------------------------- ################ declare the Anom likelihood class ################################# likelihood = AnomalyLikelihood(learningPeriod=300) #----------------------------------------------------------------------------------- for counter, value in enumerate( signal ): # iterate over each value in the signal array, the counter is used for debugging purposes ############ declare the dict which will be passed to the model ############### inputRecords = { } # the model only accepts data in a specific dict format ... inputRecords['c1'] = float(value) # this format is shown here: #------------------------------------------------------------------------------- ############ run the HTM model over the inputRecords dict ###################### result = model.run(inputRecords) #------------------------------------------------------------------------------- ############ compute the anomaly likelihood and loglikelihood ################### current_likelihood = likelihood.anomalyProbability( value, result.inferences["anomalyScore"], timestamp=None) current_loglikelihood = likelihood.computeLogLikelihood( current_likelihood) #-------------------------------------------------------------------------------- ################################ PREDICTIONS #################################### bestPredictions = result.inferences[ "multiStepBestPredictions"] # obtain the predicted value from infereces dict predictions_1.append(bestPredictions[1]) predictions_5.append( bestPredictions[5]) # append the value to the _predict array #-------------------------------------------------------------------------------- ########### add the anomaly values to the respective list ####################### anom_scores.append(result.inferences["anomalyScore"]) anom_likelihood.append(current_likelihood) anom_loglikelihood.append(current_loglikelihood) #-------------------------------------------------------------------------------- ################# print the input and prediction, for debugging purposes ######## if counter % 1 == 0: #print("Actual input [%d]: %f" % (counter, value)) print( 'prediction of [{0}]:(input) {1:8} (1-step) {2:8} (5-step) {3:8}' .format(counter, value, predictions_1[counter], predictions_5[counter])) #print("Input[%d]: %f" % (counter+1,signal[counter+1])) #print("Multi Step Predictions: %s" % (result.inferences["multiStepPredictions"])) #print("\n") #-------------------------------------------------------------------------------- ################# save the anomaly and prediction array ######################### if save == True: np.savetxt("anom_score_" + string + ".txt", anom_scores, delimiter=',' ) # the "string" is to differentiate the training and ... # the online learning outputs. np.savetxt("anom_likelihood_" + string + ".txt", anom_likelihood, delimiter=',') np.savetxt("anom_logscore_" + string + ".txt", anom_loglikelihood, delimiter=',') np.savetxt("anom_prediction_1" + string + ".txt", predictions_1, delimiter=',') np.savetxt("anom_prediction_5" + string + ".txt", predictions_5, delimiter=',')
class AnomalyLikelihoodRegion(PyRegion): """Region for computing the anomaly likelihoods.""" @classmethod def getSpec(cls): return { "description": ("Region that computes anomaly likelihoods for \ temporal memory."), "singleNodeOnly": True, "inputs": { "rawAnomalyScore": { "description": "The anomaly score whose \ likelihood is to be computed", "dataType": "Real32", "count": 1, "required": True, "isDefaultInput": False }, "metricValue": { "description": "The input metric value", "dataType": "Real32", "count": 1, "required": True, "isDefaultInput": False }, }, "outputs": { "anomalyLikelihood": { "description": "The resultant anomaly likelihood", "dataType": "Real32", "count": 1, "isDefaultOutput": True, }, }, "parameters": { "learningPeriod": { "description": "The number of iterations required for the\ algorithm to learn the basic patterns in the dataset\ and for the anomaly score to 'settle down'.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 288, "accessMode": "ReadWrite" }, "estimationSamples": { "description": "The number of reasonable anomaly scores\ required for the initial estimate of the\ Gaussian.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 100, "accessMode": "ReadWrite" }, "historicWindowSize": { "description": "Size of sliding window of historical data\ points to maintain for periodic reestimation\ of the Gaussian.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 8640, "accessMode": "ReadWrite" }, "reestimationPeriod": { "description": "How often we re-estimate the Gaussian\ distribution.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 100, "accessMode": "ReadWrite" }, }, "commands": { }, } def __init__(self, learningPeriod = 288, estimationSamples = 100, historicWindowSize = 8640, reestimationPeriod = 100): self.anomalyLikelihood = AnomalyLikelihood( learningPeriod = learningPeriod, estimationSamples = estimationSamples, historicWindowSize = historicWindowSize, reestimationPeriod = reestimationPeriod) def __eq__(self, other): return self.anomalyLikelihood == other.anomalyLikelihood def __ne__(self, other): return not self == other @classmethod def read(cls, proto): anomalyLikelihoodRegion = object.__new__(cls) anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(proto) return anomalyLikelihoodRegion def write(self, proto): self.anomalyLikelihood.write(proto) def initialize(self): pass def compute(self, inputs, outputs): anomalyScore = inputs["rawAnomalyScore"][0] value = inputs["metricValue"][0] anomalyProbability = self.anomalyLikelihood.anomalyProbability( value, anomalyScore) outputs["anomalyLikelihood"][0] = anomalyProbability
class Monitor(object): """ A NuPIC model that saves results to Redis. """ def __init__(self, config): # Instantiate NuPIC model model_params = base_model_params.MODEL_PARAMS # Set resolution model_params['modelParams']['sensorParams']['encoders']['value']['resolution'] = config['resolution'] # Override other Nupic parameters: model_params['modelParams'] = update_dict(model_params['modelParams'], config['nupic_model_params']) # Create model and enable inference on it self.model = ModelFactory.create(model_params) self.model.enableInference({'predictedField': 'value'}) # The shifter is used to bring the predictions to the actual time frame self.shifter = InferenceShifter() # The anomaly likelihood object self.anomalyLikelihood = AnomalyLikelihood() # Set stream source self.stream = config['stream'] # Setup class variables self.db = redis.Redis('localhost') self.seconds_per_request = config['seconds_per_request'] self.webhook = config['webhook'] self.anomaly_threshold = config['anomaly_threshold'] self.likelihood_threshold = config['likelihood_threshold'] self.domain = config['domain'] self.alert = False # Toogle when we get above threshold # Setup logging self.logger = logger or logging.getLogger(__name__) handler = logging.handlers.RotatingFileHandler(os.environ['LOG_DIR']+"/monitor_%s.log" % self.stream.name, maxBytes=1024*1024, backupCount=4, ) handler.setFormatter(logging.Formatter('[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s')) handler.setLevel(logging.INFO) self.logger.addHandler(handler) self.logger.setLevel(logging.INFO) self.logger.info("=== Settings ===") self.logger.info("Webhook: %s", self.webhook) self.logger.info("Domain: %s", self.domain) self.logger.info("Seconds per request: %d", self.seconds_per_request) # Write metadata to Redis try: # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly' self.db.set('name:%s' % self.stream.id, self.stream.name) self.db.set('value_label:%s' % self.stream.id, self.stream.value_label) self.db.set('value_unit:%s' % self.stream.id, self.stream.value_unit) except Exception: self.logger.warn("Could not write results to redis.", exc_info=True) def train(self): data = self.stream.historic_data() for model_input in data: self.update(model_input, False) # Don't post anomalies in training def loop(self): while True: data = self.stream.new_data() for model_input in data: self.update(model_input, True) # Post anomalies when online sleep(self.seconds_per_request) def update(self, model_input, is_to_post): # Pass the input to the model result = self.model.run(model_input) # Shift results result = self.shifter.shift(result) # Save multi step predictions inference = result.inferences['multiStepPredictions'] # Take the anomaly_score anomaly_score = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = self.anomalyLikelihood.anomalyProbability(model_input['value'], anomaly_score, model_input['time']) # Get the predicted value for reporting predicted = result.inferences['multiStepBestPredictions'][1] # Get timestamp from datetime timestamp = calendar.timegm(model_input['time'].timetuple()) self.logger.info("Processing: %s", strftime("%Y-%m-%d %H:%M:%S", model_input['time'].timetuple())) # Save results to Redis if inference[1]: try: # Save in redis with key = 'results:monitor_id' and value = 'time, raw_value, actual, prediction, anomaly' # * actual: is the value processed by the NuPIC model, which can be # an average of raw_values # * predicition: prediction based on 'actual' values. self.db.rpush('results:%s' % self.stream.id, '%s,%.5f,%.5f,%.5f,%.5f,%.5f' % (timestamp, model_input['raw_value'], result.rawInput['value'], predicted, anomaly_score, likelihood)) max_items = 10000 ln = self.db.llen('results:%s' % self.stream.id) if ln > max_items: self.db.ltrim('results:%s' % self.stream.id, ln - max_items, ln) except Exception: self.logger.warn("Could not write results to redis.", exc_info=True) # See if above threshold (in which case anomalous is True) anomalous = False if self.anomaly_threshold is not None: if anomaly_score >= self.anomaly_threshold: anomalous = True if self.likelihood_threshold is not None: if likelihood >= self.likelihood_threshold: anomalous = True # Post if webhook is not None if is_to_post and self.webhook is not None: # Check if it was in alert state in previous time step was_alerted = self.alert # Update alert state self.alert = anomalous # Send notification if webhook is set and if: # was not alerted before and is alerted now (entered anomalous state) # or # was alerted before and is not alerted now (left anomalous state) if not was_alerted and self.alert: report = {'anomaly_score': anomaly_score, 'likelihood': likelihood, 'model_input': {'time': model_input['time'].isoformat(), 'value': model_input['raw_value']}} self._send_post(report) # Return anomalous state return {"likelihood" : likelihood, "anomalous" : anomalous, "anomalyScore" : anomaly_score, "predicted" : predicted} def delete(self): """ Remove this monitor from redis """ self.db.delete("results:%s" % self.stream.id) self.db.delete('name:%s' % self.stream.id) self.db.delete('value_label:%s' % self.stream.id) self.db.delete('value_unit:%s' % self.stream.id) def _send_post(self, report): """ Send HTTP POST notification. """ if "hooks.slack.com" not in self.webhook: payload = {'sent_at': datetime.utcnow().isoformat(), 'report': report, 'monitor': self.stream.name, 'source': type(self.stream).__name__, 'metric': '%s (%s)' % (self.stream.value_label, self.stream.value_unit), 'chart': 'http://%s?id=%s' % (self.domain, self.stream.id)} else: payload = {'username': '******', 'icon_url': 'https://rawgithub.com/cloudwalkio/omg-monitor/slack-integration/docs/images/post_icon.png', 'text': 'Anomalous state in *%s* from _%s_:' % (self.stream.name, type(self.stream).__name__), 'attachments': [{'color': 'warning', 'fields': [{'title': 'Chart', 'value': 'http://%s?id=%s' % (self.domain, self.stream.id), 'short': False}, {'title': 'Metric', 'value': self.stream.value_label, 'short': True}, {'title': 'Value', 'value': str(report['model_input']['value']) + ' ' + self.stream.value_unit, 'short': True}]}]} headers = {'Content-Type': 'application/json'} try: response = requests.post(self.webhook, data=json.dumps(payload), headers=headers) except Exception: self.logger.warn('Failed to post anomaly.', exc_info=True) return self.logger.info('Anomaly posted with status code %d: %s', response.status_code, response.text) return
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed and updated. """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0): raise ValueError( "Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.") def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError( "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) # apply binary discretization if required if self._binaryThreshold is not None: if score >= self._binaryThreshold: score = 1.0 else: score = 0.0 return score def __str__(self): windowSize = 0 if self._movingAverage is not None: windowSize = self._movingAverage.windowSize return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize) def __setstate__(self, state): """deserialization""" self.__dict__.update(state) if not hasattr(self, '_mode'): self._mode = Anomaly.MODE_PURE if not hasattr(self, '_movingAverage'): self._movingAverage = None if not hasattr(self, '_binaryThreshold'): self._binaryThreshold = None
def foreach_batch_function(df, epoch_id): # Transform and write batchDF row = df.collect() print "Size of Batch" print(len(row)) if len(row) != 0: for x in range(len(row)): nb = nb + 1 record = {} level = row[x]['level'] #print(type(level)) timestamp = row[x]['@timestamp'] #print(type(timestamp)) #print(timestamp) #timestamp = timestamp.encode("utf-8") level = level.encode("utf-8") if level == 'INFO' or level == 'info': level = 'info' elif level == 'ERROR' or level == 'error': level = 'error' else: level = 'warning' #print 'step 2' record = {"timestamp": timestamp, "level": level} print(record) result = model.run(record) anom = result.inferences['anomalyScore'] #print(anom) record_anomalies.append(anom) #print "Lengths of record anomalies" #print(len(record_anomalies)) mean_anomalies = np.mean(record_anomalies) std_anomalies = np.std(record_anomalies) if std_anomalies == 0: std_anomalies = 0.00001 var_anomalies = np.var(record_anomalies) mean_anomalies_short_window = np.mean( record_anomalies[-int(history):]) likelihood = 1 - ( (norm.cdf(anom, mean_anomalies_short_window - mean_anomalies, std_anomalies)) - (norm.cdf(0, mean_anomalies_short_window - mean_anomalies, std_anomalies))) likelihood_test = 1 - ( anom - (mean_anomalies_short_window - mean_anomalies)) / std_anomalies likelihood_test_test = 1 - qfunction( (mean_anomalies_short_window - mean_anomalies) / std_anomalies) print "Likelihood" print(likelihood_test_test) anomalyLikelihood = AnomalyLikelihood() anomalyProbability = anomalyLikelihood.anomalyProbability( record['level'], anom, record['timestamp']) ani = animation.FuncAnimation(fig, animate, interval=1000, x=nb, y=likelihood_test_test) plt.show() if likelihood_test_test >= 0.85: print "Anomaly detected!" print "Probability od being abnormal", likelihood_test_test #ibefore = i #if ibefore - iafter == 1: # region = region + 1 # if region == 20: # print i-20 # print 'Anomaly detcted!' # print 'Probability of being abnormal', likelihood_test_test # print 'Probability of being abnormal (nupic)', anomalyProbability # region_anomaly = region_anomaly + 1 #else : # region = 0 #iafter = ibefore pass
input_event = (numpy.array([x, y, z]), radius) timestamp = datetime.datetime.strptime(event.time, "%Y-%m-%dT%H:%M:%S.%fZ") # input_event = (timestamp, input_event) modelInput = {} modelInput["event"] = input_event modelInput["timestamp"] = (timestamp) result = model.run(modelInput) model.save(MODELSTATE) # print result if not PREDICT: # Anomaly-Stats: anomalyScore = result.inferences["anomalyScore"] # By default 0.5 for the first 600 iterations! likelihood = anomalyLikelihood.anomalyProbability(modelInput["event"], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) AnomalyScores.append(anomalyScore) LikelihoodScores.append([modelInput["timestamp"], modelInput["event"], likelihood]) prediction = 'None' if PREDICT: # Handle Anomaly: anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None' pred_result = shifter.shift(result) if result.inferences["multiStepBestPredictions"][1]: prediction = result.inferences["multiStepBestPredictions"][1] print prediction else: prediction = 'None'
#print "prediction: ", result.inferences["multiStepBestPredictions"][1] anom = result.inferences['anomalyScore'] #print "anomaly score: ", anom record_anomalies.append(anom) mean_anomalies = np.mean(record_anomalies) std_anomalies = np.std(record_anomalies) var_anomalies = np.var(record_anomalies) mean_anomalies_short_window = np.mean(record_anomalies[-int(0.05 * i):]) #likelihood = 1-((norm.cdf(anom, mean_anomalies_short_window-mean_anomalies, std_anomalies))-(norm.cdf(0, mean_anomalies_short_window-mean_anomalies, std_anomalies))) likelihood = 1 - ((norm.cdf( anom, mean_anomalies_short_window - mean_anomalies, std_anomalies)) - (norm.cdf(0, mean_anomalies_short_window - mean_anomalies, std_anomalies))) #print "likelihood score: ", likelihood anomalyLikelihood = AnomalyLikelihood() anomalyProbability = anomalyLikelihood.anomalyProbability( record['level'], anom, record['timestamp']) #print "anomalyProbability: ", anomalyProbability if likelihood >= 1: cpt = cpt + 1 print i print "Anomaly detected!" print "Total nb of anomalies", cpt """ data = getData() for _ in xrange(5): print data.next() with open('export_dataframe_df2.csv') as inputFile: print for _ in xrange(8):
# Handle Anomaly: anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None' pred_result = shifter.shift(result) if result.inferences["multiStepBestPredictions"][1]: prediction = result.inferences["multiStepBestPredictions"][1] print prediction else: prediction = 'None' if not PREDICT or prediction == 'None': # Anomaly-Stats: anomalyScore = result.inferences["anomalyScore"] AnomalyScores.append(anomalyScore) # By default 0.5 for the first 600 iterations! TODO: Still not quite sure if that's alright... likelihood = anomalyLikelihood.anomalyProbability(event[0] + numpy.array([event[1]]), anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) LikelihoodScores.append([modelInput["timestamp"], modelInput["event"], likelihood]) prediction = 'None' # NOTE: change mag to scalar -more general! -Typecasting for DB data = {"eventType": str(event.type), "lat": float(event.latitude), "lng": float(event.longitude), "depth": float(event.depth), "scalar": float(event.mag), "timestamp": str(event.time), "AnomalyScore": float(anomalyScore), "Anomaly_mean": (float(numpy.mean(AnomalyScores)), WINDOWSIZE), "AnomalyLikelihood": float(likelihood),
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode self._useMovingAverage = slidingWindowSize > 0 self._buf = None self._i = None # Using cumulative anomaly, sliding window if self._useMovingAverage: self._windowSize = slidingWindowSize # Sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise TypeError( "Anomaly: if you define slidingWindowSize, it has to be an " "integer > 0; slidingWindowSize=%r" % slidingWindowSize) if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) def computeAnomalyScore(self, activeColumns, predictedColumns, value=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param value: (optional) metric value of current input (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: # TODO add tests for likelihood modes probability = self._likelihood.anomalyProbability( value, anomalyScore, timestamp) score = probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( value, anomalyScore, timestamp) score = anomalyScore * probability # Last, do moving-average if windowSize was specified. if self._useMovingAverage: score = self._movingAverage(score) return score def _movingAverage(self, newElement=None): """moving average @param newValue (optional) add a new element before computing the avg @return moving average of self._windowSize last elements """ if newElement is not None: self._buf[self._i]= newElement self._i = (self._i + 1) % self._windowSize return self._buf.sum() / float(self._windowSize) # normalize to 0..1
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode self._useMovingAverage = slidingWindowSize > 0 self._buf = None self._i = None # Using cumulative anomaly, sliding window if self._useMovingAverage: self._windowSize = slidingWindowSize # Sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise TypeError( "Anomaly: if you define slidingWindowSize, it has to be an " "integer > 0; slidingWindowSize=%r" % slidingWindowSize) if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) def computeAnomalyScore(self, activeColumns, predictedColumns, value=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param value: (optional) metric value of current input (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: # TODO add tests for likelihood modes probability = self._likelihood.anomalyProbability( value, anomalyScore, timestamp) score = probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( value, anomalyScore, timestamp) score = anomalyScore * probability # Last, do moving-average if windowSize was specified. if self._useMovingAverage: score = self._movingAverage(score) return score def _movingAverage(self, newElement=None): """moving average @param newValue (optional) add a new element before computing the avg @return moving average of self._windowSize last elements """ if newElement is not None: self._buf[self._i] = newElement self._i = (self._i + 1) % self._windowSize return self._buf.sum() / float(self._windowSize) # normalize to 0..1
def runAnomaly(options): """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open(options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" print "Completed processing", i, "records at", datetime.datetime.now() print "Anomaly scores for", options.inputFile, print "have been written to", options.outputFile
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) return score
def runAnomaly(options): global g_ps_count_dict_unsorted global g_abnomal_data_dict_unsorted """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) if options.oswpsDir != "": # Get PS dictionary osw = OSWData(options.oswpsDir, PS) osw.traverse_dir() g_ps_count_dict_unsorted = osw.get_ps_dict() options.max = ps_max_value = max(g_ps_count_dict_unsorted.values()) options.min = ps_min_value = min(g_ps_count_dict_unsorted.values()) print("Min value:" + str(ps_min_value) + ', ' + "Max value:" + str(ps_max_value)) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print("Using resolution value: {0}".format(resolution)) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) if options.inputFile != "": with open(options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood( likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" elif options.oswpsDir != "": if options.use_rtm == True: rtm_sensitivity = 2 rtm = LinearRegressionTemoporalMemory(window=10, interval=10, min_=options.min, max_=options.max, boost=rtm_sensitivity, leak_detection=0, critical_region="right_tail", debug=0) g_abnomal_data_dict_unsorted = rtm.analyze( g_ps_count_dict_unsorted) else: csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) ps_od = collections.OrderedDict( sorted(g_ps_count_dict_unsorted.items())) # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, timestamp in enumerate(ps_od): ps_count = ps_od[timestamp] inputData = {} inputData["value"] = float(ps_count) inputData["dttm"] = dateutil.parser.parse(timestamp) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood( likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood g_abnomal_data_dict_unsorted[timestamp] = ps_count # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" print "Completed processing", i, "records at", datetime.datetime.now( ) print "Anomaly scores for", options.inputFile, print "have been written to", options.outputFile
class Monitor(object): """ A NuPIC model that saves results to Redis. """ def __init__(self, config): # Instantiate NuPIC model model_params = base_model_params.MODEL_PARAMS model_params['modelParams']['sensorParams']['encoders']['value'][ 'resolution'] = config['resolution'] self.model = ModelFactory.create(model_params) self.model.enableInference({'predictedField': 'value'}) # The shifter is used to bring the predictions to the actual time frame self.shifter = InferenceShifter() # The anomaly likelihood object self.anomalyLikelihood = AnomalyLikelihood() # Set stream source self.stream = config['stream'] # Setup class variables self.db = redis.Redis('localhost') self.seconds_per_request = config['seconds_per_request'] self.webhook = config['webhook'] self.anomaly_threshold = config['anomaly_threshold'] self.likelihood_threshold = config['likelihood_threshold'] self.domain = config['domain'] self.alert = False # Toogle when we get above threshold # Setup logging self.logger = logger or logging.getLogger(__name__) handler = logging.handlers.RotatingFileHandler( os.environ['LOG_DIR'] + "/monitor_%s.log" % self.stream.name, maxBytes=1024 * 1024, backupCount=4, ) handler.setFormatter( logging.Formatter( '[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s' )) handler.setLevel(logging.INFO) self.logger.addHandler(handler) self.logger.setLevel(logging.INFO) self.logger.info("=== Settings ===") self.logger.info("Webhook: %s", self.webhook) self.logger.info("Domain: %s", self.domain) self.logger.info("Seconds per request: %d", self.seconds_per_request) # Write metadata to Redis try: # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly' self.db.set('name:%s' % self.stream.id, self.stream.name) self.db.set('value_label:%s' % self.stream.id, self.stream.value_label) self.db.set('value_unit:%s' % self.stream.id, self.stream.value_unit) except Exception: self.logger.warn("Could not write results to redis.", exc_info=True) def train(self): data = self.stream.historic_data() for model_input in data: self.update(model_input, False) # Don't post anomalies in training def loop(self): while True: data = self.stream.new_data() for model_input in data: self.update(model_input, True) # Post anomalies when online sleep(self.seconds_per_request) def update(self, model_input, is_to_post): # Pass the input to the model result = self.model.run(model_input) # Shift results result = self.shifter.shift(result) # Save multi step predictions inference = result.inferences['multiStepPredictions'] # Take the anomaly_score anomaly_score = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = self.anomalyLikelihood.anomalyProbability( model_input['value'], anomaly_score, model_input['time']) # Get the preducted value for reporting predicted = result.inferences['multiStepBestPredictions'][1] # Get timestamp from datetime timestamp = calendar.timegm(model_input['time'].timetuple()) self.logger.info( "Processing: %s", strftime("%Y-%m-%d %H:%M:%S", model_input['time'].timetuple())) # Save results to Redis if inference[1]: try: # Save in redis with key = 'results:monitor_id' and value = 'time, raw_value, actual, prediction, anomaly' # * actual: is the value processed by the NuPIC model, which can be # an average of raw_values # * predicition: prediction based on 'actual' values. self.db.rpush( 'results:%s' % self.stream.id, '%s,%.5f,%.5f,%.5f,%.5f,%.5f' % (timestamp, model_input['raw_value'], result.rawInput['value'], predicted, anomaly_score, likelihood)) max_items = 10000 ln = self.db.llen('results:%s' % self.stream.id) if ln > max_items: self.db.ltrim('results:%s' % self.stream.id, ln - max_items, ln) except Exception: self.logger.warn("Could not write results to redis.", exc_info=True) # See if above threshold (in which case anomalous is True) anomalous = False if self.anomaly_threshold is not None: if anomaly_score >= self.anomaly_threshold: anomalous = True if self.likelihood_threshold is not None: if likelihood >= self.likelihood_threshold: anomalous = True # Post if webhook is not None if is_to_post and self.webhook is not None: # Check if it was in alert state in previous time step was_alerted = self.alert # Update alert state self.alert = anomalous # Send notification if webhook is set and if: # was not alerted before and is alerted now (entered anomalous state) # or # was alerted before and is not alerted now (left anomalous state) if not was_alerted and self.alert: report = { 'anomaly_score': anomaly_score, 'likelihood': likelihood, 'model_input': { 'time': model_input['time'].isoformat(), 'value': model_input['raw_value'] } } self._send_post(report) # Return anomalous state return { "likelihood": likelihood, "anomalous": anomalous, "anomalyScore": anomaly_score, "predicted": predicted } def delete(self): """ Remove this monitor from redis """ self.db.delete("results:%s" % self.stream.id) self.db.delete('name:%s' % self.stream.id) self.db.delete('value_label:%s' % self.stream.id) self.db.delete('value_unit:%s' % self.stream.id) def _send_post(self, report): """ Send HTTP POST notification. """ if "hooks.slack.com" not in self.webhook: payload = { 'sent_at': datetime.utcnow().isoformat(), 'report': report, 'monitor': self.stream.name, 'source': type(self.stream).__name__, 'metric': '%s (%s)' % (self.stream.value_label, self.stream.value_unit), 'chart': 'http://%s?id=%s' % (self.domain, self.stream.id) } else: payload = { 'username': '******', 'icon_url': 'https://rawgithub.com/cloudwalkio/omg-monitor/slack-integration/docs/images/post_icon.png', 'text': 'Anomalous state in *%s* from _%s_:' % (self.stream.name, type(self.stream).__name__), 'attachments': [{ 'color': 'warning', 'fields': [{ 'title': 'Chart', 'value': 'http://%s?id=%s' % (self.domain, self.stream.id), 'short': False }, { 'title': 'Metric', 'value': self.stream.value_label, 'short': True }, { 'title': 'Value', 'value': str(report['model_input']['value']) + ' ' + self.stream.value_unit, 'short': True }] }] } headers = {'Content-Type': 'application/json'} try: response = requests.post(self.webhook, data=json.dumps(payload), headers=headers) except Exception: self.logger.warn('Failed to post anomaly.', exc_info=True) return self.logger.info('Anomaly posted with status code %d: %s', response.status_code, response.text) return
class Anomaly(object): """basic class that computes anomaly Anomaly is used to detect strange patterns/behaviors (outliners) by a trained CLA model. """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = [MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED] def __init__(self, slidingWindowSize = None, anomalyMode=MODE_PURE, shiftPredicted=False): """ @param (optional) slidingWindowSize -- enables moving average on final anomaly score; how many elements are summed up, sliding window size; int >= 0 @param (optional) anomalyMode -- (string) how to compute anomaly; possible values are: -- "pure" -- the default, how much anomal the value is; float 0..1 where 1=totally unexpected -- "likelihood" -- uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore; used in Grok -- "weighted" -- "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param shiftPredicted (optional) -- boolean [default=False]; normally active vs predicted are compared if shiftPredicted=True: predicted(T-1) vs active(T) are compared (eg from TP, CLAModel) """ # using cumulative anomaly , sliding window if slidingWindowSize > 0: self._windowSize = slidingWindowSize #sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise Exception("Anomaly: if you define slidingWindowSize, \ it has to be an integer > 0; \ slidingWindowSize="+str(slidingWindowSize)) # mode self._mode = anomalyMode if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError('Invalid anomaly mode; only supported modes are: \ "Anomaly.MODE_PURE", "Anomaly.MODE_LIKELIHOOD", \ "Anomaly.MODE_WEIGHTED"; you used:' +self._mode) if shiftPredicted: self._prevPredictedColumns = numpy.array([]) def computeAnomalyScore(self, activeColumns, predictedColumns, value=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param value: (optional) input value, that is what activeColumns represent (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ if hasattr(self, "_prevPredictedColumns"): # shiftPredicted==True prevPredictedColumns = self._prevPredictedColumns self._prevPredictedColumns = predictedColumns # to be used in step T+1 else: prevPredictedColumns = predictedColumns # 1. here is the 'classic' anomaly score anomalyScore = computeRawAnomalyScore(activeColumns, prevPredictedColumns) # compute final anomaly based on selected mode if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: probability = self._likelihood.anomalyProbability(value, anomalyScore, timestamp) score = probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability(value, anomalyScore, timestamp) score = anomalyScore * probability # last, do moving-average if windowSize is set if hasattr(self, "_windowSize"): score = self._movingAverage(score) return score def _movingAverage(self, newElement=None): """moving average @param newValue (optional) add a new element before computing the avg @return moving average of self._windowSize last elements """ if newElement is not None: self._buf[self._i]= newElement self._i = (self._i + 1) % self._windowSize return self._buf.sum()/float(self._windowSize) # normalize to 0..1
def runAnomaly(options): #define local params : inputArray = [] #holds all input data anomalyArray = [] #holds all output data inputThreshold = float(10) #how many percent of intial samples to ignore anomCounter = 0 #counts number of anomalies [timeDataFinal, yvalues ] = interpolateFunction(inputFileNameInterpol, inputFileNameLocal) #interpolate the function with open("model_params.json") as fp: modelParams = json.load(fp) #pprint(modelParams) #JSON handling sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') #print numBuckets resolution = options.resolution #f**k is resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution #print resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open(options.inputFile) as fin: #Open files #Setup headers reader = csv.reader(fin) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() #Iterate through each record in the CSV print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) #print(inputData) inputData["value"] = float(inputData["value"]) inputArray.append(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #print inputData # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) #inferences call from nupic anomalyScore = result.inferences['anomalyScore'] anomalyArray.append(anomalyScore) #comput likelihood - nupic call likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) myPlotFunction(inputArray, anomalyArray, inputThreshold) #plot the output #print file interpolBool = False writeFunction(outputFileName, timeDataFinal, anomalyArray, interpolBool)
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec): """ :param inputFileObj: A file-like object that contains input metric data :param dict inputSpec: Input data specification per input_opt_schema.json :param dict aggSpec: Optional aggregation specification per agg_opt_schema.json or None if no aggregation is requested :param dict modelSpec: Model specification per model_opt_schema.json """ self._inputSpec = inputSpec self._aggSpec = aggSpec self._modelSpec = modelSpec if "modelId" in modelSpec: self._modelId = modelSpec["modelId"] else: self._modelId = "Unknown" inputRecordSchema = ( fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"], fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"], fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) self._aggregator = aggregator.Aggregator( aggregationInfo=dict( fields=([(modelSpec["valueFieldName"], aggSpec["func"])] if aggSpec is not None else []), seconds=aggSpec["windowSize"] if aggSpec is not None else 0 ), inputFields=inputRecordSchema) self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=inputRecordSchema) self._model = self._createModel(modelSpec=modelSpec) self._anomalyLikelihood = AnomalyLikelihood() self._csvReader = self._createCsvReader(inputFileObj) @staticmethod def _createModel(modelSpec): """Instantiate and configure an OPF model :param dict modelSpec: Model specification per model_opt_schema.json :returns: OPF Model instance """ model = ModelFactory.create(modelConfig=modelSpec["modelConfig"]) model.enableLearning() model.enableInference(modelSpec["inferenceArgs"]) return model @staticmethod def _createCsvReader(fileObj): # We'll be operating on csvs with arbitrarily long fields csv.field_size_limit(2**27) # Make sure readline() works on windows too os.linesep = "\n" return csv.reader(fileObj, dialect="excel") @classmethod def _emitOutputMessage(cls, dataRow, anomalyProbability): """Emit output message to stdout :param list dataRow: the two-tuple data row on which anomalyProbability was computed, whose first element is datetime timestamp and second element is the float scalar value :param float anomalyProbability: computed anomaly probability value """ message = "%s\n" % (json.dumps([dataRow[0].isoformat(), dataRow[1], anomalyProbability]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyProbability(self, fields): """ Compute anomaly log likelihood score :param tuple fields: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Log-scaled anomaly probability :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(fields) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood score anomalyProbability = self._anomalyLikelihood.anomalyProbability( value=fields[1], anomalyScore=rawAnomalyScore, timestamp=fields[0]) return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ numRowsToSkip = self._inputSpec["rowOffset"] datetimeFormat = self._inputSpec["datetimeFormat"] inputRowTimestampIndex = self._inputSpec["timestampIndex"] inputRowValueIndex = self._inputSpec["valueIndex"] g_log.info("Processing model=%s", self._modelId) for inputRow in self._csvReader: g_log.debug("Got inputRow=%r", inputRow) if numRowsToSkip > 0: numRowsToSkip -= 1 g_log.debug("Skipping header row %s; %s rows left to skip", inputRow, numRowsToSkip) continue # Extract timestamp and value # NOTE: the order must match the `inputFields` that we passed to the # Aggregator constructor fields = [ date_time_utils.parseDatetime(inputRow[inputRowTimestampIndex], datetimeFormat), float(inputRow[inputRowValueIndex]) ] # Aggregate aggRow, _ = self._aggregator.next(fields, None) g_log.debug("Aggregator returned %s for %s", aggRow, fields) if aggRow is not None: self._emitOutputMessage( dataRow=aggRow, anomalyProbability=self._computeAnomalyProbability(aggRow)) # Reap remaining data from aggregator aggRow, _ = self._aggregator.next(None, curInputBookmark=None) g_log.debug("Aggregator reaped %s in final call", aggRow) if aggRow is not None: self._emitOutputMessage( dataRow=aggRow, anomalyProbability=self._computeAnomalyProbability(aggRow))
input_event = (numpy.array([x, y, z]), radius) timestamp = datetime.datetime.strptime(event.time, "%Y-%m-%dT%H:%M:%S.%fZ") # input_event = (timestamp, input_event) modelInput = {} modelInput["event"] = input_event modelInput["timestamp"] = (timestamp) result = model.run(modelInput) model.save(MODELSTATE) # print result if not PREDICT: # Anomaly-Stats: anomalyScore = result.inferences["anomalyScore"] # By default 0.5 for the first 600 iterations! likelihood = anomalyLikelihood.anomalyProbability( modelInput["event"], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) AnomalyScores.append(anomalyScore) LikelihoodScores.append( [modelInput["timestamp"], modelInput["event"], likelihood]) prediction = 'None' if PREDICT: # Handle Anomaly: anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None' pred_result = shifter.shift(result) if result.inferences["multiStepBestPredictions"][1]: prediction = result.inferences["multiStepBestPredictions"][1] print prediction else: prediction = 'None'
class AnomalyLikelihoodRegion(PyRegion): """Region for computing the anomaly likelihoods.""" @classmethod def getSpec(cls): return { "description": ("Region that computes anomaly likelihoods for \ temporal memory."), "singleNodeOnly": True, "inputs": { "rawAnomalyScore": { "description": "The anomaly score whose \ likelihood is to be computed", "dataType": "Real32", "count": 1, "required": True, "isDefaultInput": False }, "metricValue": { "description": "The input metric value", "dataType": "Real32", "count": 1, "required": True, "isDefaultInput": False }, }, "outputs": { "anomalyLikelihood": { "description": "The resultant anomaly likelihood", "dataType": "Real32", "count": 1, "isDefaultOutput": True, }, }, "parameters": { "learningPeriod": { "description": "The number of iterations required for the\ algorithm to learn the basic patterns in the dataset\ and for the anomaly score to 'settle down'.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 288, "accessMode": "ReadWrite" }, "estimationSamples": { "description": "The number of reasonable anomaly scores\ required for the initial estimate of the\ Gaussian.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 100, "accessMode": "ReadWrite" }, "historicWindowSize": { "description": "Size of sliding window of historical data\ points to maintain for periodic reestimation\ of the Gaussian.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 8640, "accessMode": "ReadWrite" }, "reestimationPeriod": { "description": "How often we re-estimate the Gaussian\ distribution.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 100, "accessMode": "ReadWrite" }, }, "commands": {}, } def __init__(self, learningPeriod=288, estimationSamples=100, historicWindowSize=8640, reestimationPeriod=100): self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=estimationSamples, historicWindowSize=historicWindowSize, reestimationPeriod=reestimationPeriod) def __eq__(self, other): return self.anomalyLikelihood == other.anomalyLikelihood def __ne__(self, other): return not self == other @classmethod def read(cls, proto): anomalyLikelihoodRegion = object.__new__(cls) anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read( proto) return anomalyLikelihoodRegion def write(self, proto): self.anomalyLikelihood.write(proto) def initialize(self): pass def compute(self, inputs, outputs): anomalyScore = inputs["rawAnomalyScore"][0] value = inputs["metricValue"][0] anomalyProbability = self.anomalyLikelihood.anomalyProbability( value, anomalyScore) outputs["anomalyLikelihood"][0] = anomalyProbability
parser = argparse.ArgumentParser(description='Add to existing name') parser.add_argument( '--algo', help='add to existing name especially if I am testing some new feature.') args = parser.parse_args() algo = args.algo def get_all_files_path(root): files = [ val for sublist in [[os.path.join(i[0], j) for j in i[2]] for i in os.walk(root)] for val in sublist ] return files files = get_all_files_path('results/' + algo) for f in files: if (not ('_score' in f)): print(f) df = pd.read_csv(f) a = [] al = AnomalyLikelihood() for i in range(len(df)): a.append( al.anomalyProbability(df.value.values[i], df.anomaly_score.values[i], df.timestamp.values[i])) df['anomaly_score'] = a df.to_csv(f, index=False)
try: event = (numpy.array([x, y]), int(10*float(earthquake.mag))) modelInput = {} modelInput["event"] = event modelInput["timestamp"] = ( datetime.datetime.strptime(earthquake.time, "%Y-%m-%dT%H:%M:%S.%fZ")) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] scores.append(anomalyScore) likelihoodScores.append([modelInput["timestamp"], modelInput["event"], anomalyScore]) likelihood = anomalyLikelihood.anomalyProbability( event[0] + numpy.array([event[1]]), anomalyScore, modelInput["timestamp"]) data = {"lat": earthquake.latitude, "lng": earthquake.longitude, "score": anomalyScore, "mag": earthquake.mag, "mean": (numpy.mean(scores), WINDOWSIZE), "timestamp": earthquake.time, "likelihood": likelihood} r.publish("nupic", json.dumps(data)) print data except ValueError: pass
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError( "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) return score