def compute_scores(y_test, y_pred, normalize=False): # Errors errors = np.array((y_test - y_pred)**2) if normalize: errors = errors / float(errors.max() - errors.min()) # Log likelihood. log_likelihoods = [] anomaly_likelihood = AnomalyLikelihood() for i in range(len(y_test)): likelihood = anomaly_likelihood.anomalyProbability(y_test[i], errors[i], timestamp=None) log_likelihood = anomaly_likelihood.computeLogLikelihood(likelihood) log_likelihoods.append(log_likelihood) # Anomaly thresholds: # - HIGH: log_likelihood >= 0.5 # - MEDIUM: 0.5 > log_likelihood >= 0.4 N = len(log_likelihoods) anomalies = {'high': np.zeros(N), 'medium': np.zeros(N)} x = np.array(log_likelihoods) high_idx = x >= 0.5 anomalies['high'][high_idx] = 1 # medium_idx = np.logical_and(x >= 0.4, x < 0.5) # anomalies['medium'][medium_idx] = 1 return errors, log_likelihoods, anomalies
class buildmodel: def __init__(self): #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp") with open("model_params.json") as fp: self.model_params = json.load(fp) print self.model_params self.newmodel = ModelFactory.create(self.model_params) self.newmodel.enableLearning() self.newmodel.enableInference({"predictedField": "value"}) self.DATE_FORMAT = "%d/%m/%Y %H:%M" self.anomalylikelihood = AnomalyLikelihood() def processdata(self, data): timestamp = datetime.datetime.strptime(data[0], self.DATE_FORMAT) ce = float(data[1]) result = self.newmodel.run({"dttm": timestamp, "value": ce}) #print result anomalyScore = result.inferences["anomalyScore"] anomaly = self.anomalylikelihood.anomalyProbability( ce, anomalyScore, timestamp) logLikelihood = self.anomalylikelihood.computeLogLikelihood(anomaly) logLikelihood = logLikelihood * 100 print logLikelihood '''if anomaly > 0.999: print "Detected high level anomaly at "+str(timestamp) elif anomaly>0.958: print "Detected medium level anomaly at "+str(timestamp)''' if logLikelihood > 20: print "Detected high level anomaly at " + str(timestamp) elif logLikelihood > 15: print "Detected medium level anomaly at " + str(timestamp)
def runAvogadroAnomaly(metric, options): """ Create a new HTM Model, fetch the data from the local DB, process it in NuPIC, and save the results to a new CSV output file. :param metric: AvogadroAgent metric class :param options: CLI Options """ model = createModel(metric) model.enableInference({"predictedField": metric.name}) fetched = metric.fetch(prefix=options.prefix, start=None) resultFile = open( os.path.join(options.prefix, metric.name + "-result.csv"), "wb") csvWriter = csv.writer(resultFile) csvWriter.writerow([ "timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood", "color" ]) headers = ("timestamp", metric.name) anomalyLikelihood = AnomalyLikelihood() for (ts, value) in fetched: try: value = float(value) except (ValueError, TypeError): continue if not math.isnan(value): modelInput = dict(zip(headers, (ts, value))) modelInput[metric.name] = float(value) modelInput["timestamp"] = datetime.datetime.fromtimestamp( float(modelInput["timestamp"])) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] likelihood = anomalyLikelihood.anomalyProbability( modelInput[metric.name], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if logLikelihood > .5: color = "red" elif logLikelihood > .4 and logLikelihood <= .5: color = "yellow" else: color = "green" csvWriter.writerow([ modelInput["timestamp"], float(value), anomalyScore, logLikelihood, color ]) else: resultFile.flush()
def runAvogadroAnomaly(metric, options): """ Create a new HTM Model, fetch the data from the local DB, process it in NuPIC, and save the results to a new CSV output file. :param metric: AvogadroAgent metric class :param options: CLI Options """ model = createModel(metric) model.enableInference({"predictedField": metric.name}) fetched = metric.fetch(prefix=options.prefix, start=None) resultFile = open(os.path.join(options.prefix, metric.name + "-result.csv"), "wb") csvWriter = csv.writer(resultFile) csvWriter.writerow(["timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood", "color"]) headers = ("timestamp", metric.name) anomalyLikelihood = AnomalyLikelihood() for (ts, value) in fetched: try: value = float(value) except (ValueError, TypeError): continue if not math.isnan(value): modelInput = dict(zip(headers, (ts, value))) modelInput[metric.name] = float(value) modelInput["timestamp"] = datetime.datetime.fromtimestamp( float(modelInput["timestamp"])) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] likelihood = anomalyLikelihood.anomalyProbability( modelInput[metric.name], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if logLikelihood > .5: color = "red" elif logLikelihood > .4 and logLikelihood <= .5: color = "yellow" else: color = "green" csvWriter.writerow([modelInput["timestamp"], float(value), anomalyScore, logLikelihood, color]) else: resultFile.flush()
def runAnomaly(options): global g_ps_count_dict_unsorted global g_abnomal_data_dict_unsorted """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) if options.oswpsDir != "": # Get PS dictionary osw = OSWData(options.oswpsDir, PS) osw.traverse_dir() g_ps_count_dict_unsorted = osw.get_ps_dict() options.max = ps_max_value = max(g_ps_count_dict_unsorted.values()) options.min = ps_min_value = min(g_ps_count_dict_unsorted.values()) print("Min value:" + str(ps_min_value) + ', ' + "Max value:" + str(ps_max_value)) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print("Using resolution value: {0}".format(resolution)) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) if options.inputFile != "": with open(options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood( likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" elif options.oswpsDir != "": if options.use_rtm == True: rtm_sensitivity = 2 rtm = LinearRegressionTemoporalMemory(window=10, interval=10, min_=options.min, max_=options.max, boost=rtm_sensitivity, leak_detection=0, critical_region="right_tail", debug=0) g_abnomal_data_dict_unsorted = rtm.analyze( g_ps_count_dict_unsorted) else: csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) ps_od = collections.OrderedDict( sorted(g_ps_count_dict_unsorted.items())) # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, timestamp in enumerate(ps_od): ps_count = ps_od[timestamp] inputData = {} inputData["value"] = float(ps_count) inputData["dttm"] = dateutil.parser.parse(timestamp) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood( likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood g_abnomal_data_dict_unsorted[timestamp] = ps_count # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" print "Completed processing", i, "records at", datetime.datetime.now( ) print "Anomaly scores for", options.inputFile, print "have been written to", options.outputFile
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec): """ :param inputFileObj: A file-like object that contains input metric data :param dict inputSpec: Input data specification per input_opt_schema.json :param dict aggSpec: Optional aggregation specification per agg_opt_schema.json or None if no aggregation is requested :param dict modelSpec: Model specification per model_opt_schema.json """ self._inputSpec = inputSpec self._aggSpec = aggSpec self._modelSpec = modelSpec if "modelId" in modelSpec: self._modelId = modelSpec["modelId"] else: self._modelId = "Unknown" inputRecordSchema = ( fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"], fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"], fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) self._aggregator = aggregator.Aggregator( aggregationInfo=dict( fields=([(modelSpec["valueFieldName"], aggSpec["func"])] if aggSpec is not None else []), seconds=aggSpec["windowSize"] if aggSpec is not None else 0 ), inputFields=inputRecordSchema) self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=inputRecordSchema) self._model = self._createModel(modelSpec=modelSpec) self._anomalyLikelihood = AnomalyLikelihood() self._csvReader = self._createCsvReader(inputFileObj) @staticmethod def _createModel(modelSpec): """Instantiate and configure an OPF model :param dict modelSpec: Model specification per model_opt_schema.json :returns: OPF Model instance """ model = ModelFactory.create(modelConfig=modelSpec["modelConfig"]) model.enableLearning() model.enableInference(modelSpec["inferenceArgs"]) return model @staticmethod def _createCsvReader(fileObj): # We'll be operating on csvs with arbitrarily long fields csv.field_size_limit(2**27) # Make sure readline() works on windows too os.linesep = "\n" return csv.reader(fileObj, dialect="excel") @classmethod def _emitOutputMessage(cls, dataRow, anomalyProbability): """Emit output message to stdout :param list dataRow: the two-tuple data row on which anomalyProbability was computed, whose first element is datetime timestamp and second element is the float scalar value :param float anomalyProbability: computed anomaly probability value """ message = "%s\n" % (json.dumps([dataRow[0].isoformat(), dataRow[1], anomalyProbability]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyProbability(self, fields): """ Compute anomaly log likelihood score :param tuple fields: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Log-scaled anomaly probability :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(fields) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood score anomalyProbability = self._anomalyLikelihood.anomalyProbability( value=fields[1], anomalyScore=rawAnomalyScore, timestamp=fields[0]) return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ numRowsToSkip = self._inputSpec["rowOffset"] datetimeFormat = self._inputSpec["datetimeFormat"] inputRowTimestampIndex = self._inputSpec["timestampIndex"] inputRowValueIndex = self._inputSpec["valueIndex"] g_log.info("Processing model=%s", self._modelId) for inputRow in self._csvReader: g_log.debug("Got inputRow=%r", inputRow) if numRowsToSkip > 0: numRowsToSkip -= 1 g_log.debug("Skipping header row %s; %s rows left to skip", inputRow, numRowsToSkip) continue # Extract timestamp and value # NOTE: the order must match the `inputFields` that we passed to the # Aggregator constructor fields = [ date_time_utils.parseDatetime(inputRow[inputRowTimestampIndex], datetimeFormat), float(inputRow[inputRowValueIndex]) ] # Aggregate aggRow, _ = self._aggregator.next(fields, None) g_log.debug("Aggregator returned %s for %s", aggRow, fields) if aggRow is not None: self._emitOutputMessage( dataRow=aggRow, anomalyProbability=self._computeAnomalyProbability(aggRow)) # Reap remaining data from aggregator aggRow, _ = self._aggregator.next(None, curInputBookmark=None) g_log.debug("Aggregator reaped %s in final call", aggRow) if aggRow is not None: self._emitOutputMessage( dataRow=aggRow, anomalyProbability=self._computeAnomalyProbability(aggRow))
def runAnomaly(options): """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open(options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" print "Completed processing", i, "records at", datetime.datetime.now() print "Anomaly scores for", options.inputFile, print "have been written to", options.outputFile
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ # Input column meta info compatible with parameters generated by # getScalarMetricWithTimeOfDayAnomalyParams _INPUT_RECORD_SCHEMA = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) def __init__(self, modelId, stats): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. """ self._modelId = modelId # NOTE: ModelRecordEncoder is implemented in the pull request # https://github.com/numenta/nupic/pull/2432 that is not yet in master. self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats) self._anomalyLikelihood = AnomalyLikelihood() @classmethod def _createModel(cls, stats): """Instantiate and configure an OPF model :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :returns: OPF Model instance """ # Generate swarm params swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=stats["min"], maxVal=stats["max"], minResolution=stats.get("minResolution")) model = ModelFactory.create(modelConfig=swarmParams["modelConfig"]) model.enableLearning() model.enableInference(swarmParams["inferenceArgs"]) return model @classmethod def _readInputMessages(cls): """Create a generator that waits for and yields input messages from stdin yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the `datetime.datetime` timestamp of the metric data sample and <scalar-value> is the floating point value of the metric data sample. """ while True: message = sys.stdin.readline() if message: timestamp, scalarValue = json.loads(message) yield (datetime.utcfromtimestamp(timestamp), scalarValue) else: # Front End closed the pipe (or died) break @classmethod def _emitOutputMessage(cls, rowIndex, anomalyProbability): """Emit output message to stdout :param int rowIndex: 0-based index of corresponding input sample :param float anomalyProbability: computed anomaly probability value """ message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyProbability(self, inputRow): """ Compute anomaly log likelihood score :param tuple inputRow: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Log-scaled anomaly probability :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(inputRow) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood score anomalyProbability = self._anomalyLikelihood.anomalyProbability( value=inputRow[1], anomalyScore=rawAnomalyScore, timestamp=inputRow[0]) return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ g_log.info("Processing model=%s", self._modelId) for rowIndex, inputRow in enumerate(self._readInputMessages()): anomalyProbability = self._computeAnomalyProbability(inputRow) self._emitOutputMessage(rowIndex=rowIndex, anomalyProbability=anomalyProbability)
def runAnomaly(options): """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders']['value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open (options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile,"wb")) csvWriter.writerow(["timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score"]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at",datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if likelihood > 0.9999: print "Anomaly detected:",inputData['dttm'],inputData['value'],likelihood # Write results to the output CSV file csvWriter.writerow([inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood]) # Progress report if (i%1000) == 0: print i,"records processed" print "Completed processing",i,"records at",datetime.datetime.now() print "Anomaly scores for",options.inputFile, print "have been written to",options.outputFile
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ # Input column meta info compatible with parameters generated by # getScalarMetricWithTimeOfDayAnomalyParams _INPUT_RECORD_SCHEMA = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) def __init__(self, modelId, stats): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. """ self._modelId = modelId # NOTE: ModelRecordEncoder is implemented in the pull request # https://github.com/numenta/nupic/pull/2432 that is not yet in master. self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats) self._anomalyLikelihood = AnomalyLikelihood() @classmethod def _createModel(cls, stats): """Instantiate and configure an OPF model :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :returns: OPF Model instance """ # Generate swarm params swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=stats["min"], maxVal=stats["max"], minResolution=stats.get("minResolution")) model = ModelFactory.create(modelConfig=swarmParams["modelConfig"]) model.enableLearning() model.enableInference(swarmParams["inferenceArgs"]) return model @classmethod def _readInputMessages(cls): """Create a generator that waits for and yields input messages from stdin yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the `datetime.datetime` timestamp of the metric data sample and <scalar-value> is the floating point value of the metric data sample. """ while True: message = sys.stdin.readline() if message: timestamp, scalarValue = json.loads(message) yield (datetime.utcfromtimestamp(timestamp), scalarValue) else: # Front End closed the pipe (or died) break @classmethod def _emitOutputMessage(cls, rowIndex, anomalyProbability): """Emit output message to stdout :param int rowIndex: 0-based index of corresponding input sample :param float anomalyProbability: computed anomaly probability value """ message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyProbability(self, inputRow): """ Compute anomaly log likelihood score :param tuple inputRow: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Log-scaled anomaly probability :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(inputRow) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood score anomalyProbability = self._anomalyLikelihood.anomalyProbability( value=inputRow[1], anomalyScore=rawAnomalyScore, timestamp=inputRow[0]) return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ g_log.info("Processing model=%s", self._modelId) for rowIndex, inputRow in enumerate(self._readInputMessages()): anomalyProbability = self._computeAnomalyProbability(inputRow) self._emitOutputMessage(rowIndex=rowIndex, anomalyProbability=anomalyProbability)
anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None' pred_result = shifter.shift(result) if result.inferences["multiStepBestPredictions"][1]: prediction = result.inferences["multiStepBestPredictions"][1] print prediction else: prediction = 'None' if not PREDICT or prediction == 'None': # Anomaly-Stats: anomalyScore = result.inferences["anomalyScore"] AnomalyScores.append(anomalyScore) # By default 0.5 for the first 600 iterations! TODO: Still not quite sure if that's alright... likelihood = anomalyLikelihood.anomalyProbability(event[0] + numpy.array([event[1]]), anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) LikelihoodScores.append([modelInput["timestamp"], modelInput["event"], likelihood]) prediction = 'None' # NOTE: change mag to scalar -more general! -Typecasting for DB data = {"eventType": str(event.type), "lat": float(event.latitude), "lng": float(event.longitude), "depth": float(event.depth), "scalar": float(event.mag), "timestamp": str(event.time), "AnomalyScore": float(anomalyScore), "Anomaly_mean": (float(numpy.mean(AnomalyScores)), WINDOWSIZE), "AnomalyLikelihood": float(likelihood), "logLikelihood": float(logLikelihood),
"%Y-%m-%dT%H:%M:%S.%fZ") # input_event = (timestamp, input_event) modelInput = {} modelInput["event"] = input_event modelInput["timestamp"] = (timestamp) result = model.run(modelInput) model.save(MODELSTATE) # print result if not PREDICT: # Anomaly-Stats: anomalyScore = result.inferences["anomalyScore"] # By default 0.5 for the first 600 iterations! likelihood = anomalyLikelihood.anomalyProbability( modelInput["event"], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) AnomalyScores.append(anomalyScore) LikelihoodScores.append( [modelInput["timestamp"], modelInput["event"], likelihood]) prediction = 'None' if PREDICT: # Handle Anomaly: anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None' pred_result = shifter.shift(result) if result.inferences["multiStepBestPredictions"][1]: prediction = result.inferences["multiStepBestPredictions"][1] print prediction else: prediction = 'None'
#################################################### sdr_output = np.zeros(N_COLUMNS) sp.compute(encoder_output, True, sdr_output) active_columns = np.nonzero(sdr_output)[0] #################################################### tm.compute(active_columns, learn=True) #################################################### anom_score[i] = anomaly_score.compute(tm.getActiveCells(), tm.getPredictiveCells()) anom_logscore[i] = anomaly_likelihood.computeLogLikelihood(anom_score[i]) if i % 100 == 0: print(i) anom_score_futuro = np.zeros((5000 + 1, )) ## excluir isso aqui anom_logscore_futuro = np.zeros((5000 + 1, )) ## excluir isso aqui for i, linha in enumerate( sign[7220000:7225000, :] ): ##esse for é o do arquivo teste pra ver se é possível encontrar as anomalias #excluir esse for depois scalar_encoder.encodeIntoArray(linha[1], bits_scalar) time_encoder.encodeIntoArray(linha[0], bits_time)
def run_model(model, a, b, save=True, aggregate=False, string=''): """Runs the HTM model and generates the anomaly scores. Arguments: :model: the model created with create_model(). :a: the beginning of the anylized signal. :b: the end of the anylized signal. :save: if True then the anomalies output will be saved as .txt. :string: the string to differentiate the name of the saved .txt files. """ ######################### open the signs ########################################### if aggregate == True: signal, time_vect = aggregate_(a, b) print("the size of signal is: {i}".format(i=np.size(signal))) else: signal = open_signs() signal = signal[a:b, 1] #----------------------------------------------------------------------------------- ##################### declare the anomalies lists ################################## anom_scores = [] anom_likelihood = [] anom_loglikelihood = [] #----------------------------------------------------------------------------------- ##################### declare the predicted list ################################### predictions_1 = [] predictions_5 = [] predictions_1.append(0) for i in range(5): predictions_5.append( 0 ) # as this prediction is always made 1 step ahead, then the first value predicted will be ... # the prediction of the index with number 1, therefore doesn't exist a prediction of the 0 ... # index. The same problem occurs with the last signal, because it will predict one more ... # step ahead, this means that after seen the last signal "A", it will predict "A+1" even it doesnt ... # having a matching value in the signal array. #----------------------------------------------------------------------------------- ################ declare the Anom likelihood class ################################# likelihood = AnomalyLikelihood(learningPeriod=300) #----------------------------------------------------------------------------------- for counter, value in enumerate( signal ): # iterate over each value in the signal array, the counter is used for debugging purposes ############ declare the dict which will be passed to the model ############### inputRecords = { } # the model only accepts data in a specific dict format ... inputRecords['c1'] = float(value) # this format is shown here: #------------------------------------------------------------------------------- ############ run the HTM model over the inputRecords dict ###################### result = model.run(inputRecords) #------------------------------------------------------------------------------- ############ compute the anomaly likelihood and loglikelihood ################### current_likelihood = likelihood.anomalyProbability( value, result.inferences["anomalyScore"], timestamp=None) current_loglikelihood = likelihood.computeLogLikelihood( current_likelihood) #-------------------------------------------------------------------------------- ################################ PREDICTIONS #################################### bestPredictions = result.inferences[ "multiStepBestPredictions"] # obtain the predicted value from infereces dict predictions_1.append(bestPredictions[1]) predictions_5.append( bestPredictions[5]) # append the value to the _predict array #-------------------------------------------------------------------------------- ########### add the anomaly values to the respective list ####################### anom_scores.append(result.inferences["anomalyScore"]) anom_likelihood.append(current_likelihood) anom_loglikelihood.append(current_loglikelihood) #-------------------------------------------------------------------------------- ################# print the input and prediction, for debugging purposes ######## if counter % 1 == 0: #print("Actual input [%d]: %f" % (counter, value)) print( 'prediction of [{0}]:(input) {1:8} (1-step) {2:8} (5-step) {3:8}' .format(counter, value, predictions_1[counter], predictions_5[counter])) #print("Input[%d]: %f" % (counter+1,signal[counter+1])) #print("Multi Step Predictions: %s" % (result.inferences["multiStepPredictions"])) #print("\n") #-------------------------------------------------------------------------------- ################# save the anomaly and prediction array ######################### if save == True: np.savetxt("anom_score_" + string + ".txt", anom_scores, delimiter=',' ) # the "string" is to differentiate the training and ... # the online learning outputs. np.savetxt("anom_likelihood_" + string + ".txt", anom_likelihood, delimiter=',') np.savetxt("anom_logscore_" + string + ".txt", anom_loglikelihood, delimiter=',') np.savetxt("anom_prediction_1" + string + ".txt", predictions_1, delimiter=',') np.savetxt("anom_prediction_5" + string + ".txt", predictions_5, delimiter=',')