def compute_scores(y_test, y_pred, normalize=False): # Errors errors = np.array((y_test - y_pred)**2) if normalize: errors = errors / float(errors.max() - errors.min()) # Log likelihood. log_likelihoods = [] anomaly_likelihood = AnomalyLikelihood() for i in range(len(y_test)): likelihood = anomaly_likelihood.anomalyProbability(y_test[i], errors[i], timestamp=None) log_likelihood = anomaly_likelihood.computeLogLikelihood(likelihood) log_likelihoods.append(log_likelihood) # Anomaly thresholds: # - HIGH: log_likelihood >= 0.5 # - MEDIUM: 0.5 > log_likelihood >= 0.4 N = len(log_likelihoods) anomalies = {'high': np.zeros(N), 'medium': np.zeros(N)} x = np.array(log_likelihoods) high_idx = x >= 0.5 anomalies['high'][high_idx] = 1 # medium_idx = np.logical_and(x >= 0.4, x < 0.5) # anomalies['medium'][medium_idx] = 1 return errors, log_likelihoods, anomalies
class buildmodel: def __init__(self): #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp") with open("model_params.json") as fp: self.model_params = json.load(fp) print self.model_params self.newmodel = ModelFactory.create(self.model_params) self.newmodel.enableLearning() self.newmodel.enableInference({"predictedField": "value"}) self.DATE_FORMAT = "%d/%m/%Y %H:%M" self.anomalylikelihood = AnomalyLikelihood() def processdata(self, data): timestamp = datetime.datetime.strptime(data[0], self.DATE_FORMAT) ce = float(data[1]) result = self.newmodel.run({"dttm": timestamp, "value": ce}) #print result anomalyScore = result.inferences["anomalyScore"] anomaly = self.anomalylikelihood.anomalyProbability( ce, anomalyScore, timestamp) logLikelihood = self.anomalylikelihood.computeLogLikelihood(anomaly) logLikelihood = logLikelihood * 100 print logLikelihood '''if anomaly > 0.999: print "Detected high level anomaly at "+str(timestamp) elif anomaly>0.958: print "Detected medium level anomaly at "+str(timestamp)''' if logLikelihood > 20: print "Detected high level anomaly at " + str(timestamp) elif logLikelihood > 15: print "Detected medium level anomaly at " + str(timestamp)
def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if (self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED): self._likelihood = AnomalyLikelihood() # probabilistic anomaly else: self._likelihood = None if not self._mode in self._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0): raise ValueError( "Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.")
def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed and updated. """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0 ): raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.")
def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
def runAvogadroAnomaly(metric, options): """ Create a new HTM Model, fetch the data from the local DB, process it in NuPIC, and save the results to a new CSV output file. :param metric: AvogadroAgent metric class :param options: CLI Options """ model = createModel(metric) model.enableInference({"predictedField": metric.name}) fetched = metric.fetch(prefix=options.prefix, start=None) resultFile = open( os.path.join(options.prefix, metric.name + "-result.csv"), "wb") csvWriter = csv.writer(resultFile) csvWriter.writerow([ "timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood", "color" ]) headers = ("timestamp", metric.name) anomalyLikelihood = AnomalyLikelihood() for (ts, value) in fetched: try: value = float(value) except (ValueError, TypeError): continue if not math.isnan(value): modelInput = dict(zip(headers, (ts, value))) modelInput[metric.name] = float(value) modelInput["timestamp"] = datetime.datetime.fromtimestamp( float(modelInput["timestamp"])) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] likelihood = anomalyLikelihood.anomalyProbability( modelInput[metric.name], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if logLikelihood > .5: color = "red" elif logLikelihood > .4 and logLikelihood <= .5: color = "yellow" else: color = "green" csvWriter.writerow([ modelInput["timestamp"], float(value), anomalyScore, logLikelihood, color ]) else: resultFile.flush()
def __init__(self): #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp") with open("model_params.json") as fp: self.model_params = json.load(fp) print self.model_params self.newmodel = ModelFactory.create(self.model_params) self.newmodel.enableLearning() self.newmodel.enableInference({"predictedField": "value"}) self.DATE_FORMAT = "%d/%m/%Y %H:%M" self.anomalylikelihood = AnomalyLikelihood()
def __init__(self, learningPeriod=288, estimationSamples=100, historicWindowSize=8640, reestimationPeriod=100): self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=estimationSamples, historicWindowSize=historicWindowSize, reestimationPeriod=reestimationPeriod)
def runAvogadroAnomaly(metric, options): """ Create a new HTM Model, fetch the data from the local DB, process it in NuPIC, and save the results to a new CSV output file. :param metric: AvogadroAgent metric class :param options: CLI Options """ model = createModel(metric) model.enableInference({"predictedField": metric.name}) fetched = metric.fetch(prefix=options.prefix, start=None) resultFile = open(os.path.join(options.prefix, metric.name + "-result.csv"), "wb") csvWriter = csv.writer(resultFile) csvWriter.writerow(["timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood", "color"]) headers = ("timestamp", metric.name) anomalyLikelihood = AnomalyLikelihood() for (ts, value) in fetched: try: value = float(value) except (ValueError, TypeError): continue if not math.isnan(value): modelInput = dict(zip(headers, (ts, value))) modelInput[metric.name] = float(value) modelInput["timestamp"] = datetime.datetime.fromtimestamp( float(modelInput["timestamp"])) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] likelihood = anomalyLikelihood.anomalyProbability( modelInput[metric.name], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if logLikelihood > .5: color = "red" elif logLikelihood > .4 and logLikelihood <= .5: color = "yellow" else: color = "green" csvWriter.writerow([modelInput["timestamp"], float(value), anomalyScore, logLikelihood, color]) else: resultFile.flush()
def __init__(self, modelId, stats, replaceParams=()): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :param sequence replaceParams: Parameter replacement PATH REPLACEMENT pairs """ self._modelId = modelId self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats, replaceParams=replaceParams) self._anomalyLikelihood = AnomalyLikelihood()
def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode self._useMovingAverage = slidingWindowSize > 0 self._buf = None self._i = None # Using cumulative anomaly, sliding window if self._useMovingAverage: self._windowSize = slidingWindowSize # Sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise TypeError( "Anomaly: if you define slidingWindowSize, it has to be an " "integer > 0; slidingWindowSize=%r" % slidingWindowSize) if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
def __init__(self, modelId, stats): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. """ self._modelId = modelId # NOTE: ModelRecordEncoder is implemented in the pull request # https://github.com/numenta/nupic/pull/2432 that is not yet in master. self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats) self._anomalyLikelihood = AnomalyLikelihood()
def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if (self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED): self._likelihood = AnomalyLikelihood() # probabilistic anomaly else: self._likelihood = None if not self._mode in self._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0 ): raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.")
def __init__(self, config): # Instantiate NuPIC model model_params = base_model_params.MODEL_PARAMS # Set resolution model_params['modelParams']['sensorParams']['encoders']['value']['resolution'] = config['resolution'] # Override other Nupic parameters: model_params['modelParams'] = update_dict(model_params['modelParams'], config['nupic_model_params']) # Create model and enable inference on it self.model = ModelFactory.create(model_params) self.model.enableInference({'predictedField': 'value'}) # The shifter is used to bring the predictions to the actual time frame self.shifter = InferenceShifter() # The anomaly likelihood object self.anomalyLikelihood = AnomalyLikelihood() # Set stream source self.stream = config['stream'] # Setup class variables self.db = redis.Redis('localhost') self.seconds_per_request = config['seconds_per_request'] self.webhook = config['webhook'] self.anomaly_threshold = config['anomaly_threshold'] self.likelihood_threshold = config['likelihood_threshold'] self.domain = config['domain'] self.alert = False # Toogle when we get above threshold # Setup logging self.logger = logger or logging.getLogger(__name__) handler = logging.handlers.RotatingFileHandler(os.environ['LOG_DIR']+"/monitor_%s.log" % self.stream.name, maxBytes=1024*1024, backupCount=4, ) handler.setFormatter(logging.Formatter('[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s')) handler.setLevel(logging.INFO) self.logger.addHandler(handler) self.logger.setLevel(logging.INFO) self.logger.info("=== Settings ===") self.logger.info("Webhook: %s", self.webhook) self.logger.info("Domain: %s", self.domain) self.logger.info("Seconds per request: %d", self.seconds_per_request) # Write metadata to Redis try: # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly' self.db.set('name:%s' % self.stream.id, self.stream.name) self.db.set('value_label:%s' % self.stream.id, self.stream.value_label) self.db.set('value_unit:%s' % self.stream.id, self.stream.value_unit) except Exception: self.logger.warn("Could not write results to redis.", exc_info=True)
def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec): """ :param inputFileObj: A file-like object that contains input metric data :param dict inputSpec: Input data specification per input_opt_schema.json :param dict aggSpec: Optional aggregation specification per agg_opt_schema.json or None if no aggregation is requested :param dict modelSpec: Model specification per model_opt_schema.json """ self._inputSpec = inputSpec self._aggSpec = aggSpec self._modelSpec = modelSpec if "modelId" in modelSpec: self._modelId = modelSpec["modelId"] else: self._modelId = "Unknown" inputRecordSchema = ( fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"], fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"], fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) self._aggregator = aggregator.Aggregator( aggregationInfo=dict( fields=([(modelSpec["valueFieldName"], aggSpec["func"])] if aggSpec is not None else []), seconds=aggSpec["windowSize"] if aggSpec is not None else 0 ), inputFields=inputRecordSchema) self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=inputRecordSchema) self._model = self._createModel(modelSpec=modelSpec) self._anomalyLikelihood = AnomalyLikelihood() self._csvReader = self._createCsvReader(inputFileObj)
def create_anomaly_likelihood_calc_from_disk(self, metric): anomaly_likelihood_calculators_path = self.__model_storage_manager.get_save_path( metric["metric_name"], path_element="anomaly_likelihood_calculator") with open( os.path.join(anomaly_likelihood_calculators_path, self.__anomaly_likelihood_calculator_filename), "rb") as anomaly_likelihood_calc_file: return AnomalyLikelihood.readFromFile(anomaly_likelihood_calc_file)
def get_anomaly_likelihood_calc(self, metric, models_number_below_configured_limit): anomaly_likelihood_calc = None if not self.__loaded_models.anomaly_calc_exists(metric["metric_name"]): anomaly_likelihood_calculators_path = self.__model_storage_manager.get_save_path( metric["metric_name"], path_element="anomaly_likelihood_calculator") if os.path.isfile( os.path.join( anomaly_likelihood_calculators_path, self._anomaly_likelihood_calculator_filename)): if models_number_below_configured_limit: try: if models_number_below_configured_limit: self.__loaded_models.add_anomaly_calc_for_metric( metric["metric_name"], self.__anomaly_likelihood_calculator_factory. create_anomaly_likelihood_calc_from_disk( metric)) self._logger.debug( "get_anomaly_likelihood_calc", "LOADED ANOMALY_LIKELIHOOD_CALC FROM FILE", metric=str(metric["metric_name"])) except Exception as ex: if models_number_below_configured_limit: self.__loaded_models.add_anomaly_calc_for_metric( metric["metric_name"], AnomalyLikelihood()) self._logger.warn( "get_anomaly_likelihood_calc", "Failed to create an anomaly likelihood calc from disk", metric=str(metric["metric_name"]), exception_type=str(type(ex).__name__), exception_message=str(ex.message)) else: if models_number_below_configured_limit: self.__loaded_models.add_anomaly_calc_for_metric( metric["metric_name"], AnomalyLikelihood()) if self.__loaded_models.anomaly_calc_exists(metric["metric_name"]): anomaly_likelihood_calc = self.__loaded_models.get_anomaly_calc( metric["metric_name"]) return anomaly_likelihood_calc
def __init__(self, learningPeriod = 288, estimationSamples = 100, historicWindowSize = 8640, reestimationPeriod = 100): self.anomalyLikelihood = AnomalyLikelihood( learningPeriod = learningPeriod, estimationSamples = estimationSamples, historicWindowSize = historicWindowSize, reestimationPeriod = reestimationPeriod)
def testLikelihoodValues(self): """ test to see if the region keeps track of state correctly and produces the same likelihoods as the AnomalyLikelihood module """ anomalyLikelihoodRegion = AnomalyLikelihoodRegion() anomalyLikelihood = AnomalyLikelihood() inputs = AnomalyLikelihoodRegion.getSpec()['inputs'] outputs = AnomalyLikelihoodRegion.getSpec()['outputs'] with open(_INPUT_DATA_FILE) as f: reader = csv.reader(f) reader.next() for record in reader: consumption = float(record[1]) anomalyScore = float(record[2]) likelihood1 = anomalyLikelihood.anomalyProbability( consumption, anomalyScore) inputs['rawAnomalyScore'] = numpy.array([anomalyScore]) inputs['metricValue'] = numpy.array([consumption]) anomalyLikelihoodRegion.compute(inputs, outputs) likelihood2 = outputs['anomalyLikelihood'][0] self.assertEqual(likelihood1, likelihood2)
def testLikelihoodValues(self): """ test to see if the region keeps track of state correctly and produces the same likelihoods as the AnomalyLikelihood module """ anomalyLikelihoodRegion = AnomalyLikelihoodRegion() anomalyLikelihood = AnomalyLikelihood() inputs = AnomalyLikelihoodRegion.getSpec()['inputs'] outputs = AnomalyLikelihoodRegion.getSpec()['outputs'] with open (_INPUT_DATA_FILE) as f: reader = csv.reader(f) reader.next() for record in reader: consumption = float(record[1]) anomalyScore = float(record[2]) likelihood1 = anomalyLikelihood.anomalyProbability( consumption, anomalyScore) inputs['rawAnomalyScore'] = numpy.array([anomalyScore]) inputs['metricValue'] = numpy.array([consumption]) anomalyLikelihoodRegion.compute(inputs, outputs) likelihood2 = outputs['anomalyLikelihood'][0] self.assertEqual(likelihood1, likelihood2)
def definir_AnomDetect(N_DATA): """ retorna as classes de anom_score, a classe de anom_likelihood, e os arrays que guardarão a anom_score e a anom_likelihood """ anom_score_txt = np.zeros((N_DATA+1,)) anom_logscore_txt = np.zeros((N_DATA+1,)) anomaly_score = Anomaly(slidingWindowSize=25) anomaly_likelihood = AnomalyLikelihood(learningPeriod=600, historicWindowSize=313) return anomaly_score, anomaly_likelihood, anom_score_txt, anom_logscore_txt
def __init__(self, slidingWindowSize=None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode self._useMovingAverage = slidingWindowSize > 0 self._buf = None self._i = None # Using cumulative anomaly, sliding window if self._useMovingAverage: self._windowSize = slidingWindowSize # Sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise TypeError( "Anomaly: if you define slidingWindowSize, it has to be an " "integer > 0; slidingWindowSize=%r" % slidingWindowSize) if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
def __init__(self, modelId, stats, replaceParams): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :param sequence replaceParams: Parameter replacement PATH REPLACEMENT pairs """ self._modelId = modelId self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats, replaceParams=replaceParams) self._anomalyLikelihood = AnomalyLikelihood()
def _send_predictions(self, metric_id, metric_envelope): if metric_id not in self._models: self._models[metric_id] = ModelFactory.create(self.model_params) self._models[metric_id].enableInference( {'predictedField': 'value'}) self._shifters[metric_id] = InferenceShifter() self._anomaly_likelihood[metric_id] = AnomalyLikelihood() model = self._models[metric_id] shifter = self._shifters[metric_id] modelInput = { # 'dttm': value['metric']['timestamp'], 'dttm': datetime.datetime.now(), 'value': metric_envelope['metric']['value'] } result = shifter.shift(model.run(modelInput)) inferences = result.inferences inference = inferences['multiStepBestPredictions'][5] metric = metric_envelope['metric'] metric_name = metric['name'] if inference is not None: metric['name'] = metric_name + '.nupic.predicted' metric['value'] = inference str_value = simplejson.dumps(metric_envelope) self._producer.send_messages(self._topic, str_value) if 'anomalyScore' in inferences: metric['name'] = metric_name + '.nupic.anomaly_score' metric['value'] = inferences['anomalyScore'] str_value = simplejson.dumps(metric_envelope) self._producer.send_messages(self._topic, str_value) anomalyLikelihood = self._anomaly_likelihood[metric_id] likelihood = anomalyLikelihood.anomalyProbability( modelInput['value'], inferences['anomalyScore'], datetime.datetime.now()) metric['name'] = metric_name + '.nupic.anomaly_likelihood' metric['value'] = likelihood str_value = simplejson.dumps(metric_envelope) self._producer.send_messages(self._topic, str_value)
def __init__(self, slidingWindowSize = None, anomalyMode=MODE_PURE, shiftPredicted=False): """ @param (optional) slidingWindowSize -- enables moving average on final anomaly score; how many elements are summed up, sliding window size; int >= 0 @param (optional) anomalyMode -- (string) how to compute anomaly; possible values are: -- "pure" -- the default, how much anomal the value is; float 0..1 where 1=totally unexpected -- "likelihood" -- uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore; used in Grok -- "weighted" -- "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param shiftPredicted (optional) -- boolean [default=False]; normally active vs predicted are compared if shiftPredicted=True: predicted(T-1) vs active(T) are compared (eg from TP, CLAModel) """ # using cumulative anomaly , sliding window if slidingWindowSize > 0: self._windowSize = slidingWindowSize #sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise Exception("Anomaly: if you define slidingWindowSize, \ it has to be an integer > 0; \ slidingWindowSize="+str(slidingWindowSize)) # mode self._mode = anomalyMode if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError('Invalid anomaly mode; only supported modes are: \ "Anomaly.MODE_PURE", "Anomaly.MODE_LIKELIHOOD", \ "Anomaly.MODE_WEIGHTED"; you used:' +self._mode) if shiftPredicted: self._prevPredictedColumns = numpy.array([])
def main(): # cluster similar inputs together in SDR space s = SpatialPooler() print(type(s)) # powerful sequence memory in SDR space t = TemporalMemory() print(type(t)) # computes rolling Gaussian based on raw anomaly scores and then their # likelihood a = AnomalyLikelihood() print(type(a)) # temporally groups active cell sets from TM u = UnionTemporalPooler() print(type(u)) # learning pairings of Union representations and labeled classes c = SDRClassifier() print(type(c))
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ # Input column meta info compatible with parameters generated by # getScalarMetricWithTimeOfDayAnomalyParams _INPUT_RECORD_SCHEMA = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) def __init__(self, modelId, stats): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. """ self._modelId = modelId # NOTE: ModelRecordEncoder is implemented in the pull request # https://github.com/numenta/nupic/pull/2432 that is not yet in master. self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats) self._anomalyLikelihood = AnomalyLikelihood() @classmethod def _createModel(cls, stats): """Instantiate and configure an OPF model :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :returns: OPF Model instance """ # Generate swarm params swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=stats["min"], maxVal=stats["max"], minResolution=stats.get("minResolution")) model = ModelFactory.create(modelConfig=swarmParams["modelConfig"]) model.enableLearning() model.enableInference(swarmParams["inferenceArgs"]) return model @classmethod def _readInputMessages(cls): """Create a generator that waits for and yields input messages from stdin yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the `datetime.datetime` timestamp of the metric data sample and <scalar-value> is the floating point value of the metric data sample. """ while True: message = sys.stdin.readline() if message: timestamp, scalarValue = json.loads(message) yield (datetime.utcfromtimestamp(timestamp), scalarValue) else: # Front End closed the pipe (or died) break @classmethod def _emitOutputMessage(cls, rowIndex, anomalyProbability): """Emit output message to stdout :param int rowIndex: 0-based index of corresponding input sample :param float anomalyProbability: computed anomaly probability value """ message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyProbability(self, inputRow): """ Compute anomaly log likelihood score :param tuple inputRow: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Log-scaled anomaly probability :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(inputRow) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood score anomalyProbability = self._anomalyLikelihood.anomalyProbability( value=inputRow[1], anomalyScore=rawAnomalyScore, timestamp=inputRow[0]) return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ g_log.info("Processing model=%s", self._modelId) for rowIndex, inputRow in enumerate(self._readInputMessages()): anomalyProbability = self._computeAnomalyProbability(inputRow) self._emitOutputMessage(rowIndex=rowIndex, anomalyProbability=anomalyProbability)
class AnomalyLikelihoodRegion(PyRegion): """Region for computing the anomaly likelihoods.""" @classmethod def getSpec(cls): return { "description": ("Region that computes anomaly likelihoods for \ temporal memory."), "singleNodeOnly": True, "inputs": { "rawAnomalyScore": { "description": "The anomaly score whose \ likelihood is to be computed", "dataType": "Real32", "count": 1, "required": True, "isDefaultInput": False }, "metricValue": { "description": "The input metric value", "dataType": "Real32", "count": 1, "required": True, "isDefaultInput": False }, }, "outputs": { "anomalyLikelihood": { "description": "The resultant anomaly likelihood", "dataType": "Real32", "count": 1, "isDefaultOutput": True, }, }, "parameters": { "learningPeriod": { "description": "The number of iterations required for the\ algorithm to learn the basic patterns in the dataset\ and for the anomaly score to 'settle down'.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 288, "accessMode": "ReadWrite" }, "estimationSamples": { "description": "The number of reasonable anomaly scores\ required for the initial estimate of the\ Gaussian.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 100, "accessMode": "ReadWrite" }, "historicWindowSize": { "description": "Size of sliding window of historical data\ points to maintain for periodic reestimation\ of the Gaussian.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 8640, "accessMode": "ReadWrite" }, "reestimationPeriod": { "description": "How often we re-estimate the Gaussian\ distribution.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 100, "accessMode": "ReadWrite" }, }, "commands": { }, } def __init__(self, learningPeriod = 288, estimationSamples = 100, historicWindowSize = 8640, reestimationPeriod = 100): self.anomalyLikelihood = AnomalyLikelihood( learningPeriod = learningPeriod, estimationSamples = estimationSamples, historicWindowSize = historicWindowSize, reestimationPeriod = reestimationPeriod) def __eq__(self, other): return self.anomalyLikelihood == other.anomalyLikelihood def __ne__(self, other): return not self == other @classmethod def read(cls, proto): anomalyLikelihoodRegion = object.__new__(cls) anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(proto) return anomalyLikelihoodRegion def write(self, proto): self.anomalyLikelihood.write(proto) def initialize(self): pass def compute(self, inputs, outputs): anomalyScore = inputs["rawAnomalyScore"][0] value = inputs["metricValue"][0] anomalyProbability = self.anomalyLikelihood.anomalyProbability( value, anomalyScore) outputs["anomalyLikelihood"][0] = anomalyProbability
class Monitor(object): """ A NuPIC model that saves results to Redis. """ def __init__(self, config): # Instantiate NuPIC model model_params = base_model_params.MODEL_PARAMS # Set resolution model_params['modelParams']['sensorParams']['encoders']['value']['resolution'] = config['resolution'] # Override other Nupic parameters: model_params['modelParams'] = update_dict(model_params['modelParams'], config['nupic_model_params']) # Create model and enable inference on it self.model = ModelFactory.create(model_params) self.model.enableInference({'predictedField': 'value'}) # The shifter is used to bring the predictions to the actual time frame self.shifter = InferenceShifter() # The anomaly likelihood object self.anomalyLikelihood = AnomalyLikelihood() # Set stream source self.stream = config['stream'] # Setup class variables self.db = redis.Redis('localhost') self.seconds_per_request = config['seconds_per_request'] self.webhook = config['webhook'] self.anomaly_threshold = config['anomaly_threshold'] self.likelihood_threshold = config['likelihood_threshold'] self.domain = config['domain'] self.alert = False # Toogle when we get above threshold # Setup logging self.logger = logger or logging.getLogger(__name__) handler = logging.handlers.RotatingFileHandler(os.environ['LOG_DIR']+"/monitor_%s.log" % self.stream.name, maxBytes=1024*1024, backupCount=4, ) handler.setFormatter(logging.Formatter('[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s')) handler.setLevel(logging.INFO) self.logger.addHandler(handler) self.logger.setLevel(logging.INFO) self.logger.info("=== Settings ===") self.logger.info("Webhook: %s", self.webhook) self.logger.info("Domain: %s", self.domain) self.logger.info("Seconds per request: %d", self.seconds_per_request) # Write metadata to Redis try: # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly' self.db.set('name:%s' % self.stream.id, self.stream.name) self.db.set('value_label:%s' % self.stream.id, self.stream.value_label) self.db.set('value_unit:%s' % self.stream.id, self.stream.value_unit) except Exception: self.logger.warn("Could not write results to redis.", exc_info=True) def train(self): data = self.stream.historic_data() for model_input in data: self.update(model_input, False) # Don't post anomalies in training def loop(self): while True: data = self.stream.new_data() for model_input in data: self.update(model_input, True) # Post anomalies when online sleep(self.seconds_per_request) def update(self, model_input, is_to_post): # Pass the input to the model result = self.model.run(model_input) # Shift results result = self.shifter.shift(result) # Save multi step predictions inference = result.inferences['multiStepPredictions'] # Take the anomaly_score anomaly_score = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = self.anomalyLikelihood.anomalyProbability(model_input['value'], anomaly_score, model_input['time']) # Get the predicted value for reporting predicted = result.inferences['multiStepBestPredictions'][1] # Get timestamp from datetime timestamp = calendar.timegm(model_input['time'].timetuple()) self.logger.info("Processing: %s", strftime("%Y-%m-%d %H:%M:%S", model_input['time'].timetuple())) # Save results to Redis if inference[1]: try: # Save in redis with key = 'results:monitor_id' and value = 'time, raw_value, actual, prediction, anomaly' # * actual: is the value processed by the NuPIC model, which can be # an average of raw_values # * predicition: prediction based on 'actual' values. self.db.rpush('results:%s' % self.stream.id, '%s,%.5f,%.5f,%.5f,%.5f,%.5f' % (timestamp, model_input['raw_value'], result.rawInput['value'], predicted, anomaly_score, likelihood)) max_items = 10000 ln = self.db.llen('results:%s' % self.stream.id) if ln > max_items: self.db.ltrim('results:%s' % self.stream.id, ln - max_items, ln) except Exception: self.logger.warn("Could not write results to redis.", exc_info=True) # See if above threshold (in which case anomalous is True) anomalous = False if self.anomaly_threshold is not None: if anomaly_score >= self.anomaly_threshold: anomalous = True if self.likelihood_threshold is not None: if likelihood >= self.likelihood_threshold: anomalous = True # Post if webhook is not None if is_to_post and self.webhook is not None: # Check if it was in alert state in previous time step was_alerted = self.alert # Update alert state self.alert = anomalous # Send notification if webhook is set and if: # was not alerted before and is alerted now (entered anomalous state) # or # was alerted before and is not alerted now (left anomalous state) if not was_alerted and self.alert: report = {'anomaly_score': anomaly_score, 'likelihood': likelihood, 'model_input': {'time': model_input['time'].isoformat(), 'value': model_input['raw_value']}} self._send_post(report) # Return anomalous state return {"likelihood" : likelihood, "anomalous" : anomalous, "anomalyScore" : anomaly_score, "predicted" : predicted} def delete(self): """ Remove this monitor from redis """ self.db.delete("results:%s" % self.stream.id) self.db.delete('name:%s' % self.stream.id) self.db.delete('value_label:%s' % self.stream.id) self.db.delete('value_unit:%s' % self.stream.id) def _send_post(self, report): """ Send HTTP POST notification. """ if "hooks.slack.com" not in self.webhook: payload = {'sent_at': datetime.utcnow().isoformat(), 'report': report, 'monitor': self.stream.name, 'source': type(self.stream).__name__, 'metric': '%s (%s)' % (self.stream.value_label, self.stream.value_unit), 'chart': 'http://%s?id=%s' % (self.domain, self.stream.id)} else: payload = {'username': '******', 'icon_url': 'https://rawgithub.com/cloudwalkio/omg-monitor/slack-integration/docs/images/post_icon.png', 'text': 'Anomalous state in *%s* from _%s_:' % (self.stream.name, type(self.stream).__name__), 'attachments': [{'color': 'warning', 'fields': [{'title': 'Chart', 'value': 'http://%s?id=%s' % (self.domain, self.stream.id), 'short': False}, {'title': 'Metric', 'value': self.stream.value_label, 'short': True}, {'title': 'Value', 'value': str(report['model_input']['value']) + ' ' + self.stream.value_unit, 'short': True}]}]} headers = {'Content-Type': 'application/json'} try: response = requests.post(self.webhook, data=json.dumps(payload), headers=headers) except Exception: self.logger.warn('Failed to post anomaly.', exc_info=True) return self.logger.info('Anomaly posted with status code %d: %s', response.status_code, response.text) return
parser = argparse.ArgumentParser(description='Add to existing name') parser.add_argument( '--algo', help='add to existing name especially if I am testing some new feature.') args = parser.parse_args() algo = args.algo def get_all_files_path(root): files = [ val for sublist in [[os.path.join(i[0], j) for j in i[2]] for i in os.walk(root)] for val in sublist ] return files files = get_all_files_path('results/' + algo) for f in files: if (not ('_score' in f)): print(f) df = pd.read_csv(f) a = [] al = AnomalyLikelihood() for i in range(len(df)): a.append( al.anomalyProbability(df.value.values[i], df.anomaly_score.values[i], df.timestamp.values[i])) df['anomaly_score'] = a df.to_csv(f, index=False)
class Anomaly(object): """basic class that computes anomaly Anomaly is used to detect strange patterns/behaviors (outliners) by a trained CLA model. """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = [MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED] def __init__(self, slidingWindowSize = None, anomalyMode=MODE_PURE, shiftPredicted=False): """ @param (optional) slidingWindowSize -- enables moving average on final anomaly score; how many elements are summed up, sliding window size; int >= 0 @param (optional) anomalyMode -- (string) how to compute anomaly; possible values are: -- "pure" -- the default, how much anomal the value is; float 0..1 where 1=totally unexpected -- "likelihood" -- uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore; used in Grok -- "weighted" -- "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param shiftPredicted (optional) -- boolean [default=False]; normally active vs predicted are compared if shiftPredicted=True: predicted(T-1) vs active(T) are compared (eg from TP, CLAModel) """ # using cumulative anomaly , sliding window if slidingWindowSize > 0: self._windowSize = slidingWindowSize #sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise Exception("Anomaly: if you define slidingWindowSize, \ it has to be an integer > 0; \ slidingWindowSize="+str(slidingWindowSize)) # mode self._mode = anomalyMode if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError('Invalid anomaly mode; only supported modes are: \ "Anomaly.MODE_PURE", "Anomaly.MODE_LIKELIHOOD", \ "Anomaly.MODE_WEIGHTED"; you used:' +self._mode) if shiftPredicted: self._prevPredictedColumns = numpy.array([]) def computeAnomalyScore(self, activeColumns, predictedColumns, value=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param value: (optional) input value, that is what activeColumns represent (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ if hasattr(self, "_prevPredictedColumns"): # shiftPredicted==True prevPredictedColumns = self._prevPredictedColumns self._prevPredictedColumns = predictedColumns # to be used in step T+1 else: prevPredictedColumns = predictedColumns # 1. here is the 'classic' anomaly score anomalyScore = computeRawAnomalyScore(activeColumns, prevPredictedColumns) # compute final anomaly based on selected mode if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: probability = self._likelihood.anomalyProbability(value, anomalyScore, timestamp) score = probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability(value, anomalyScore, timestamp) score = anomalyScore * probability # last, do moving-average if windowSize is set if hasattr(self, "_windowSize"): score = self._movingAverage(score) return score def _movingAverage(self, newElement=None): """moving average @param newValue (optional) add a new element before computing the avg @return moving average of self._windowSize last elements """ if newElement is not None: self._buf[self._i]= newElement self._i = (self._i + 1) % self._windowSize return self._buf.sum()/float(self._windowSize) # normalize to 0..1
def runAnomaly(options): global g_ps_count_dict_unsorted global g_abnomal_data_dict_unsorted """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) if options.oswpsDir != "": # Get PS dictionary osw = OSWData(options.oswpsDir, PS) osw.traverse_dir() g_ps_count_dict_unsorted = osw.get_ps_dict() options.max = ps_max_value = max(g_ps_count_dict_unsorted.values()) options.min = ps_min_value = min(g_ps_count_dict_unsorted.values()) print("Min value:" + str(ps_min_value) + ', ' + "Max value:" + str(ps_max_value)) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print("Using resolution value: {0}".format(resolution)) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) if options.inputFile != "": with open(options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood( likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" elif options.oswpsDir != "": if options.use_rtm == True: rtm_sensitivity = 2 rtm = LinearRegressionTemoporalMemory(window=10, interval=10, min_=options.min, max_=options.max, boost=rtm_sensitivity, leak_detection=0, critical_region="right_tail", debug=0) g_abnomal_data_dict_unsorted = rtm.analyze( g_ps_count_dict_unsorted) else: csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) ps_od = collections.OrderedDict( sorted(g_ps_count_dict_unsorted.items())) # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, timestamp in enumerate(ps_od): ps_count = ps_od[timestamp] inputData = {} inputData["value"] = float(ps_count) inputData["dttm"] = dateutil.parser.parse(timestamp) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood( likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood g_abnomal_data_dict_unsorted[timestamp] = ps_count # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" print "Completed processing", i, "records at", datetime.datetime.now( ) print "Anomaly scores for", options.inputFile, print "have been written to", options.outputFile
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec): """ :param inputFileObj: A file-like object that contains input metric data :param dict inputSpec: Input data specification per input_opt_schema.json :param dict aggSpec: Optional aggregation specification per agg_opt_schema.json or None if no aggregation is requested :param dict modelSpec: Model specification per model_opt_schema.json """ self._inputSpec = inputSpec self._aggSpec = aggSpec self._modelSpec = modelSpec if "modelId" in modelSpec: self._modelId = modelSpec["modelId"] else: self._modelId = "Unknown" inputRecordSchema = ( fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"], fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"], fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) self._aggregator = aggregator.Aggregator( aggregationInfo=dict( fields=([(modelSpec["valueFieldName"], aggSpec["func"])] if aggSpec is not None else []), seconds=aggSpec["windowSize"] if aggSpec is not None else 0 ), inputFields=inputRecordSchema) self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=inputRecordSchema) self._model = self._createModel(modelSpec=modelSpec) self._anomalyLikelihood = AnomalyLikelihood() self._csvReader = self._createCsvReader(inputFileObj) @staticmethod def _createModel(modelSpec): """Instantiate and configure an OPF model :param dict modelSpec: Model specification per model_opt_schema.json :returns: OPF Model instance """ model = ModelFactory.create(modelConfig=modelSpec["modelConfig"]) model.enableLearning() model.enableInference(modelSpec["inferenceArgs"]) return model @staticmethod def _createCsvReader(fileObj): # We'll be operating on csvs with arbitrarily long fields csv.field_size_limit(2**27) # Make sure readline() works on windows too os.linesep = "\n" return csv.reader(fileObj, dialect="excel") @classmethod def _emitOutputMessage(cls, dataRow, anomalyProbability): """Emit output message to stdout :param list dataRow: the two-tuple data row on which anomalyProbability was computed, whose first element is datetime timestamp and second element is the float scalar value :param float anomalyProbability: computed anomaly probability value """ message = "%s\n" % (json.dumps([dataRow[0].isoformat(), dataRow[1], anomalyProbability]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyProbability(self, fields): """ Compute anomaly log likelihood score :param tuple fields: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Log-scaled anomaly probability :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(fields) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood score anomalyProbability = self._anomalyLikelihood.anomalyProbability( value=fields[1], anomalyScore=rawAnomalyScore, timestamp=fields[0]) return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ numRowsToSkip = self._inputSpec["rowOffset"] datetimeFormat = self._inputSpec["datetimeFormat"] inputRowTimestampIndex = self._inputSpec["timestampIndex"] inputRowValueIndex = self._inputSpec["valueIndex"] g_log.info("Processing model=%s", self._modelId) for inputRow in self._csvReader: g_log.debug("Got inputRow=%r", inputRow) if numRowsToSkip > 0: numRowsToSkip -= 1 g_log.debug("Skipping header row %s; %s rows left to skip", inputRow, numRowsToSkip) continue # Extract timestamp and value # NOTE: the order must match the `inputFields` that we passed to the # Aggregator constructor fields = [ date_time_utils.parseDatetime(inputRow[inputRowTimestampIndex], datetimeFormat), float(inputRow[inputRowValueIndex]) ] # Aggregate aggRow, _ = self._aggregator.next(fields, None) g_log.debug("Aggregator returned %s for %s", aggRow, fields) if aggRow is not None: self._emitOutputMessage( dataRow=aggRow, anomalyProbability=self._computeAnomalyProbability(aggRow)) # Reap remaining data from aggregator aggRow, _ = self._aggregator.next(None, curInputBookmark=None) g_log.debug("Aggregator reaped %s in final call", aggRow) if aggRow is not None: self._emitOutputMessage( dataRow=aggRow, anomalyProbability=self._computeAnomalyProbability(aggRow))
# FIFO events = reversed(events) if PREDICT: import PREDICTmodel_params as model_params else: import model_params as model_params if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE) else: model = ModelFactory.create(model_params.MODEL_PARAMS) if VISUALIZE: Patcher().patchCLAModel(model) model.enableInference({"predictedField": "event"}) print "Model created!\n" # Get the Model-Classes: anomalyLikelihood = AnomalyLikelihood() if PREDICT: from nupic.data.inference_shifter import InferenceShifter shifter = InferenceShifter() if (WINDOWSIZE != None): AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE) else: AnomalyScores = deque() # numpy.ones(len(events)), maxlen=len(events) ? LikelihoodScores = deque() r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"), port=int(os.environ.get("REDIS_PORT", 6379)), db=int(os.environ.get("REDIS_DB", 0))) # Feed data into the model: print "Start Data-Feed...\n"
def runAnomaly(options): """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders']['value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open (options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile,"wb")) csvWriter.writerow(["timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score"]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at",datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if likelihood > 0.9999: print "Anomaly detected:",inputData['dttm'],inputData['value'],likelihood # Write results to the output CSV file csvWriter.writerow([inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood]) # Progress report if (i%1000) == 0: print i,"records processed" print "Completed processing",i,"records at",datetime.datetime.now() print "Anomaly scores for",options.inputFile, print "have been written to",options.outputFile
def foreach_batch_function(df, epoch_id): # Transform and write batchDF row = df.collect() print "Size of Batch" print(len(row)) if len(row) != 0: for x in range(len(row)): nb = nb + 1 record = {} level = row[x]['level'] #print(type(level)) timestamp = row[x]['@timestamp'] #print(type(timestamp)) #print(timestamp) #timestamp = timestamp.encode("utf-8") level = level.encode("utf-8") if level == 'INFO' or level == 'info': level = 'info' elif level == 'ERROR' or level == 'error': level = 'error' else: level = 'warning' #print 'step 2' record = {"timestamp": timestamp, "level": level} print(record) result = model.run(record) anom = result.inferences['anomalyScore'] #print(anom) record_anomalies.append(anom) #print "Lengths of record anomalies" #print(len(record_anomalies)) mean_anomalies = np.mean(record_anomalies) std_anomalies = np.std(record_anomalies) if std_anomalies == 0: std_anomalies = 0.00001 var_anomalies = np.var(record_anomalies) mean_anomalies_short_window = np.mean( record_anomalies[-int(history):]) likelihood = 1 - ( (norm.cdf(anom, mean_anomalies_short_window - mean_anomalies, std_anomalies)) - (norm.cdf(0, mean_anomalies_short_window - mean_anomalies, std_anomalies))) likelihood_test = 1 - ( anom - (mean_anomalies_short_window - mean_anomalies)) / std_anomalies likelihood_test_test = 1 - qfunction( (mean_anomalies_short_window - mean_anomalies) / std_anomalies) print "Likelihood" print(likelihood_test_test) anomalyLikelihood = AnomalyLikelihood() anomalyProbability = anomalyLikelihood.anomalyProbability( record['level'], anom, record['timestamp']) ani = animation.FuncAnimation(fig, animate, interval=1000, x=nb, y=likelihood_test_test) plt.show() if likelihood_test_test >= 0.85: print "Anomaly detected!" print "Probability od being abnormal", likelihood_test_test #ibefore = i #if ibefore - iafter == 1: # region = region + 1 # if region == 20: # print i-20 # print 'Anomaly detcted!' # print 'Probability of being abnormal', likelihood_test_test # print 'Probability of being abnormal (nupic)', anomalyProbability # region_anomaly = region_anomaly + 1 #else : # region = 0 #iafter = ibefore pass
# FIFO events = reversed(events) if PREDICT: import PREDICTmodel_params as model_params else: import model_params as model_params if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE) else: model = ModelFactory.create(model_params.MODEL_PARAMS) if VISUALIZE: Patcher().patchCLAModel(model) model.enableInference({"predictedField": "event"}) print "Model created!\n" # Get the Model-Classes: anomalyLikelihood = AnomalyLikelihood() if PREDICT: from nupic.data.inference_shifter import InferenceShifter shifter = InferenceShifter() if (WINDOWSIZE != None): AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE) else: AnomalyScores = deque() # numpy.ones(len(events)), maxlen=len(events) ? LikelihoodScores = deque() r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"), port=int(os.environ.get("REDIS_PORT", 6379)), db=int(os.environ.get("REDIS_DB", 0))) # Feed data into the model:
class Anomaly(object): """Utility class for generating anomaly scores in different ways. :param slidingWindowSize: [optional] - how many elements are summed up; enables moving average on final anomaly score; int >= 0 :param mode: (string) [optional] how to compute anomaly, one of: - :const:`nupic.algorithms.anomaly.Anomaly.MODE_PURE` - :const:`nupic.algorithms.anomaly.Anomaly.MODE_LIKELIHOOD` - :const:`nupic.algorithms.anomaly.Anomaly.MODE_WEIGHTED` :param binaryAnomalyThreshold: [optional] if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed. """ # anomaly modes supported MODE_PURE = "pure" """ Default mode. The raw anomaly score as computed by :func:`~.anomaly_likelihood.computeRawAnomalyScore` """ MODE_LIKELIHOOD = "likelihood" """ Uses the :class:`~.anomaly_likelihood.AnomalyLikelihood` class, which models probability of receiving this value and anomalyScore """ MODE_WEIGHTED = "weighted" """ Multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood (anomaly * likelihood) """ _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if (self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED): self._likelihood = AnomalyLikelihood() # probabilistic anomaly else: self._likelihood = None if not self._mode in self._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0 ): raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.") def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. :param activeColumns: array of active column indices :param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) :param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) :param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) :returns: the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) # apply binary discretization if required if self._binaryThreshold is not None: if score >= self._binaryThreshold: score = 1.0 else: score = 0.0 return score def __str__(self): windowSize = 0 if self._movingAverage is not None: windowSize = self._movingAverage.windowSize return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize) def __eq__(self, other): return (isinstance(other, Anomaly) and other._mode == self._mode and other._binaryThreshold == self._binaryThreshold and other._movingAverage == self._movingAverage and other._likelihood == self._likelihood) def __setstate__(self, state): """deserialization""" self.__dict__.update(state) if not hasattr(self, '_mode'): self._mode = Anomaly.MODE_PURE if not hasattr(self, '_movingAverage'): self._movingAverage = None if not hasattr(self, '_binaryThreshold'): self._binaryThreshold = None
minLatitude = lat if float(earthquake.longitude) > maxLongitude: maxLongitude = lng if float(earthquake.longitude) < minLongitude: minLongitude = lng inp.seek(0) next(inp) # Create Model model = ModelFactory.create(model_params.MODEL_PARAMS) model.enableInference({"predictedField": "event"}) anomalyLikelihood = AnomalyLikelihood() scores=deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE) likelihoodScores=deque(maxlen=100) r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"), port=int(os.environ.get("REDIS_PORT", 6379)), db=int(os.environ.get("REDIS_DB", 0))) for n, earthquake in enumerate(reversed(earthquakes)): x = int(10000 * abs(float(earthquake.longitude) - minLongitude)) y = int(10000 * abs(float(earthquake.latitude) - minLatitude)) try: event = (numpy.array([x, y]), int(10*float(earthquake.mag)))
def read(cls, proto): anomalyLikelihoodRegion = object.__new__(cls) anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(proto) return anomalyLikelihoodRegion
def runAnomaly(options): """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open(options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" print "Completed processing", i, "records at", datetime.datetime.now() print "Anomaly scores for", options.inputFile, print "have been written to", options.outputFile
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) return score
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode self._useMovingAverage = slidingWindowSize > 0 self._buf = None self._i = None # Using cumulative anomaly, sliding window if self._useMovingAverage: self._windowSize = slidingWindowSize # Sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise TypeError( "Anomaly: if you define slidingWindowSize, it has to be an " "integer > 0; slidingWindowSize=%r" % slidingWindowSize) if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) def computeAnomalyScore(self, activeColumns, predictedColumns, value=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param value: (optional) metric value of current input (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: # TODO add tests for likelihood modes probability = self._likelihood.anomalyProbability( value, anomalyScore, timestamp) score = probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( value, anomalyScore, timestamp) score = anomalyScore * probability # Last, do moving-average if windowSize was specified. if self._useMovingAverage: score = self._movingAverage(score) return score def _movingAverage(self, newElement=None): """moving average @param newValue (optional) add a new element before computing the avg @return moving average of self._windowSize last elements """ if newElement is not None: self._buf[self._i]= newElement self._i = (self._i + 1) % self._windowSize return self._buf.sum() / float(self._windowSize) # normalize to 0..1
result = model.run(record) #print "prediction: ", result.inferences["multiStepBestPredictions"][1] anom = result.inferences['anomalyScore'] #print "anomaly score: ", anom record_anomalies.append(anom) mean_anomalies = np.mean(record_anomalies) std_anomalies = np.std(record_anomalies) var_anomalies = np.var(record_anomalies) mean_anomalies_short_window = np.mean(record_anomalies[-int(0.05 * i):]) #likelihood = 1-((norm.cdf(anom, mean_anomalies_short_window-mean_anomalies, std_anomalies))-(norm.cdf(0, mean_anomalies_short_window-mean_anomalies, std_anomalies))) likelihood = 1 - ((norm.cdf( anom, mean_anomalies_short_window - mean_anomalies, std_anomalies)) - (norm.cdf(0, mean_anomalies_short_window - mean_anomalies, std_anomalies))) #print "likelihood score: ", likelihood anomalyLikelihood = AnomalyLikelihood() anomalyProbability = anomalyLikelihood.anomalyProbability( record['level'], anom, record['timestamp']) #print "anomalyProbability: ", anomalyProbability if likelihood >= 1: cpt = cpt + 1 print i print "Anomaly detected!" print "Total nb of anomalies", cpt """ data = getData() for _ in xrange(5): print data.next() with open('export_dataframe_df2.csv') as inputFile:
import TESTmodel_params as model_params if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE) else: model = ModelFactory.create(model_params.MODEL_PARAMS) if VISUALIZE: Patcher().patchCLAModel(model) model.enableInference({"predictedField": "event"}) # Predict not only event but also scalar! TODO # model.enableInference({"predictedField": "scalar"}) # model.enableInference({"predictedField": "timestamp"}) print "Model created!\n" # Get the Model-Classes: anomalyLikelihood = AnomalyLikelihood() if (WINDOWSIZE != None): AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE) else: AnomalyScores = deque() LikelihoodScores = deque() if REDIS: r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"), port=int(os.environ.get("REDIS_PORT", 6379)), db=int(os.environ.get("REDIS_DB", 0))) print "Start Data-Feed...\n" for n, event in enumerate(events): # Cluster or not? // Convert all Coords. to non-neg Ints (better handeled by the model): if CLUSTERING:
class Anomaly(object): """Utility class for generating anomaly scores in different ways. Supported modes: MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw anomaly scores MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score that was used to generate the likelihood """ # anomaly modes supported MODE_PURE = "pure" MODE_LIKELIHOOD = "likelihood" MODE_WEIGHTED = "weighted" _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED) def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed and updated. """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0): raise ValueError( "Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.") def compute(self, activeColumns, predictedColumns, inputValue=None, timestamp=None): """Compute the anomaly score as the percent of active columns not predicted. @param activeColumns: array of active column indices @param predictedColumns: array of columns indices predicted in this step (used for anomaly in step T+1) @param inputValue: (optional) value of current input to encoders (eg "cat" for category encoder) (used in anomaly-likelihood) @param timestamp: (optional) date timestamp when the sample occured (used in anomaly-likelihood) @return the computed anomaly score; float 0..1 """ # Start by computing the raw anomaly score. anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns) # Compute final anomaly based on selected mode. if self._mode == Anomaly.MODE_PURE: score = anomalyScore elif self._mode == Anomaly.MODE_LIKELIHOOD: if inputValue is None: raise ValueError( "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' " "requires 'inputValue' as parameter to compute() method. ") probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) # low likelihood -> hi anomaly score = 1 - probability elif self._mode == Anomaly.MODE_WEIGHTED: probability = self._likelihood.anomalyProbability( inputValue, anomalyScore, timestamp) score = anomalyScore * (1 - probability) # Last, do moving-average if windowSize was specified. if self._movingAverage is not None: score = self._movingAverage.next(score) # apply binary discretization if required if self._binaryThreshold is not None: if score >= self._binaryThreshold: score = 1.0 else: score = 0.0 return score def __str__(self): windowSize = 0 if self._movingAverage is not None: windowSize = self._movingAverage.windowSize return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize) def __setstate__(self, state): """deserialization""" self.__dict__.update(state) if not hasattr(self, '_mode'): self._mode = Anomaly.MODE_PURE if not hasattr(self, '_movingAverage'): self._movingAverage = None if not hasattr(self, '_binaryThreshold'): self._binaryThreshold = None
if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE) else: model = ModelFactory.create(model_params.MODEL_PARAMS) if VISUALIZE: Patcher().patchCLAModel(model) model.enableInference({"predictedField": "event"}) # Predict not only event but also scalar! TODO # model.enableInference({"predictedField": "scalar"}) # model.enableInference({"predictedField": "timestamp"}) print "Model created!\n" # Get the Model-Classes: if PREDICT: from nupic.data.inference_shifter import InferenceShifter shifter = InferenceShifter() anomalyLikelihood = AnomalyLikelihood() if (WINDOWSIZE != None): AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE) else: AnomalyScores = deque() LikelihoodScores = deque() if REDIS: r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"), port=int(os.environ.get("REDIS_PORT", 6379)), db=int(os.environ.get("REDIS_DB", 0))) print "Start Data-Feed...\n" for n, event in enumerate(events): # Cluster or not? // Convert all Coords. to non-neg Ints (better handeled by the model): if CLUSTERING:
class AnomalyLikelihoodRegion(PyRegion): """Region for computing the anomaly likelihoods.""" @classmethod def getSpec(cls): return { "description": ("Region that computes anomaly likelihoods for \ temporal memory."), "singleNodeOnly": True, "inputs": { "rawAnomalyScore": { "description": "The anomaly score whose \ likelihood is to be computed", "dataType": "Real32", "count": 1, "required": True, "isDefaultInput": False }, "metricValue": { "description": "The input metric value", "dataType": "Real32", "count": 1, "required": True, "isDefaultInput": False }, }, "outputs": { "anomalyLikelihood": { "description": "The resultant anomaly likelihood", "dataType": "Real32", "count": 1, "isDefaultOutput": True, }, }, "parameters": { "learningPeriod": { "description": "The number of iterations required for the\ algorithm to learn the basic patterns in the dataset\ and for the anomaly score to 'settle down'.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 288, "accessMode": "ReadWrite" }, "estimationSamples": { "description": "The number of reasonable anomaly scores\ required for the initial estimate of the\ Gaussian.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 100, "accessMode": "ReadWrite" }, "historicWindowSize": { "description": "Size of sliding window of historical data\ points to maintain for periodic reestimation\ of the Gaussian.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 8640, "accessMode": "ReadWrite" }, "reestimationPeriod": { "description": "How often we re-estimate the Gaussian\ distribution.", "dataType": "UInt32", "count": 1, "constraints": "", "defaultValue": 100, "accessMode": "ReadWrite" }, }, "commands": {}, } def __init__(self, learningPeriod=288, estimationSamples=100, historicWindowSize=8640, reestimationPeriod=100): self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=estimationSamples, historicWindowSize=historicWindowSize, reestimationPeriod=reestimationPeriod) def __eq__(self, other): return self.anomalyLikelihood == other.anomalyLikelihood def __ne__(self, other): return not self == other @classmethod def read(cls, proto): anomalyLikelihoodRegion = object.__new__(cls) anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read( proto) return anomalyLikelihoodRegion def write(self, proto): self.anomalyLikelihood.write(proto) def initialize(self): pass def compute(self, inputs, outputs): anomalyScore = inputs["rawAnomalyScore"][0] value = inputs["metricValue"][0] anomalyProbability = self.anomalyLikelihood.anomalyProbability( value, anomalyScore) outputs["anomalyLikelihood"][0] = anomalyProbability
def read(cls, proto): anomalyLikelihoodRegion = object.__new__(cls) anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read( proto) return anomalyLikelihoodRegion
def getSchema(cls): return AnomalyLikelihood.getSchema()