def __init__(self, use_saved_model, checkpoint_path, likelihood_path): self.use_saved_model = use_saved_model if use_saved_model: self.model = ModelFactory.loadFromCheckpoint(checkpoint_path) self.model.enableInference({'predictedField': 'cpu'}) self.model.enableInference({'predictedField': 'memory'}) with open(likelihood_path, "rb") as f: self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( ).readFromFile(f) else: self.model = ModelFactory.create(model_params.MODEL_PARAMS) self.model.enableInference({'predictedField': 'cpu'}) self.model.enableInference({'predictedField': 'memory'}) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood()
def run(self): locations_collection.find_one_and_update( {'site_no': self.intersection}, {'$set': { 'running': True }}) anomaly_likelihood_helper = anomaly_likelihood.AnomalyLikelihood( 200, 200, reestimationPeriod=10) model = create_single_sensor_model(self.sensor, self.intersection) while not self.done: try: val = self.queue_in.get(True, 1) except Empty: continue result = model.run(val) prediction = result.inferences["multiStepBestPredictions"][1] if val[self.sensor] is None: anomaly_score = None likelihood = None else: anomaly_score = result.inferences["anomalyScore"] likelihood = anomaly_likelihood_helper.anomalyProbability( val[self.sensor], anomaly_score, val['timestamp']) self.queue_out.put( (self.sensor, prediction, anomaly_score, likelihood))
def initialize(self): calcRange = abs(self.inputMax - self.inputMin) calcPad = calcRange * .2 self.inputMin = self.inputMin - calcPad self.inputMax = self.inputMax + calcPad # Load the model params JSON paramsPath = os.path.join( os.path.split(__file__)[0], "modelParams", "model_params.json") with open(paramsPath) as fp: modelParams = json.load(fp) self.sensorParams = modelParams["modelParams"]["sensorParams"]\ ["encoders"]["value"] # RDSE - resolution calculation resolution = max(0.001, (self.inputMax - self.inputMin) / \ self.sensorParams.pop("numBuckets") ) self.sensorParams["resolution"] = resolution self.model = ModelFactory.create(modelParams) self.model.enableInference({"predictedField": "value"}) # Initialize the anomaly likelihood object numentaLearningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100)
def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin - rangePadding, maxVal=self.inputMax + rangePadding, minResolution=0.001, tmImplementation="cpp")["modelConfig"] self._setupEncoderParams( modelParams["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(modelParams) self.model.enableInference({"predictedField": "value"}) if self.useLikelihood: # Initialize the anomaly likelihood object numentaLearningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100)
def initialize(self): rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, w=41, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) self.timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = self.valueEncoder.getWidth() self.sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.etm = ExtendedTemporalMemory( **{ "activationThreshold": 13, "cellsPerColumn": 1, "columnDimensions": (2048, ), "basalInputDimensions": (self.timestampEncoder.getWidth(), ), "initialPermanence": 0.21, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 32, "minThreshold": 10, "maxNewSynapseCount": 20, "permanenceDecrement": 0.1, "permanenceIncrement": 0.1, "seed": 1960, "checkInputs": False, }) learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100)
def testReestimationPeriodArg(self): estimateAnomalyLikelihoodsWrap = mock.Mock( wraps=an.estimateAnomalyLikelihoods, autospec=True) estimateAnomalyLikelihoodsPatch = mock.patch( "nupic.algorithms.anomaly_likelihood.estimateAnomalyLikelihoods", side_effect=estimateAnomalyLikelihoodsWrap, autospec=True) with estimateAnomalyLikelihoodsPatch: l = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2, historicWindowSize=3, reestimationPeriod=2) # burn-in l.anomalyProbability(10, 0.1, timestamp=1) l.anomalyProbability(10, 0.1, timestamp=2) l.anomalyProbability(10, 0.1, timestamp=3) l.anomalyProbability(10, 0.1, timestamp=4) self.assertEqual(estimateAnomalyLikelihoodsWrap.call_count, 0) l.anomalyProbability(10, 0.1, timestamp=5) self.assertEqual(estimateAnomalyLikelihoodsWrap.call_count, 1) l.anomalyProbability(10, 0.1, timestamp=6) self.assertEqual(estimateAnomalyLikelihoodsWrap.call_count, 1) l.anomalyProbability(10, 0.1, timestamp=7) self.assertEqual(estimateAnomalyLikelihoodsWrap.call_count, 2) l.anomalyProbability(10, 0.1, timestamp=8) self.assertEqual(estimateAnomalyLikelihoodsWrap.call_count, 2)
def testAnomalyProbabilityResultsDuringProbationaryPeriod(self): originalUpdateAnomalyLikelihoods = an.updateAnomalyLikelihoods def updateAnomalyLikelihoodsWrap(anomalyScores, params, verbosity=0): likelihoods, avgRecordList, params = originalUpdateAnomalyLikelihoods( anomalyScores=anomalyScores, params=params, verbosity=verbosity) self.assertEqual(len(likelihoods), 1) return [0.1], avgRecordList, params updateAnomalyLikelihoodsPatch = mock.patch( "nupic.algorithms.anomaly_likelihood.updateAnomalyLikelihoods", side_effect=updateAnomalyLikelihoodsWrap, autospec=True) with updateAnomalyLikelihoodsPatch: l = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2, historicWindowSize=3) # 0.5 result is expected during burn-in self.assertEqual(l.anomalyProbability(10, 0.1, timestamp=1), 0.5) self.assertEqual(l.anomalyProbability(10, 0.1, timestamp=2), 0.5) self.assertEqual(l.anomalyProbability(10, 0.1, timestamp=3), 0.5) self.assertEqual(l.anomalyProbability(10, 0.1, timestamp=4), 0.5) self.assertEqual(l.anomalyProbability(10, 0.1, timestamp=5), 0.9) self.assertEqual(l.anomalyProbability(10, 0.1, timestamp=6), 0.9)
def run(df, basedir, column): df.to_csv("C:\\Datos\\data.csv") model = swarmModel(basedir, column) model.enableInference({"predictedField": "VAR"}) anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood() counter = 0 results = [] cerror = 0 for index, row in df.iterrows(): counter += 1 if (counter % 1000 == 0): print "Read %i lines..." % counter modelInput = dict(zip(df.columns, row)) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] bestPredictions = result.inferences["multiStepBestPredictions"] likelihood = anomalyLikelihood.anomalyProbability( modelInput["VAR"], anomalyScore, modelInput["time"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) bestPredictions = result.inferences['multiStepBestPredictions'] allPredictions = result.inferences['multiStepPredictions'] oneStep = bestPredictions[1] # Confidence values are keyed by prediction value in multiStepPredictions. oneStepConfidence = allPredictions[1][oneStep] #Relative Percent Difference error = 0 if oneStep != 0 or modelInput["VAR"] != 0: error = abs(modelInput["VAR"] - oneStep) / abs( max(modelInput["VAR"], oneStep)) * 100 cerror += error results.append([ modelInput["time"], modelInput["VAR"], oneStep, anomalyScore, likelihood, logLikelihood, error ]) results_filename = basedir + "//" + column + "_results.txt" with open(results_filename, 'w') as outfile: for result in results: for value in result: outfile.write(str(value) + " ") outfile.write("\n") avg_error = cerror / counter summary = basedir + "//" + "summary.txt" with open(summary, 'w+') as sum: sum.write(column + " " + avg_error) model.save(basedir) return results
def main(inputPath): model = createPredictionModel() ts = time.time() shifter = InferenceShifter() anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood() cpus = 1 #scaling parameters squared_error_sum = 0 prev_prediction = 0 cur_period = 1 buffered_prediction = 0 for i in range (0, 10000): with open('./out/realtime_prediction.csv', mode='a') as csv_file: csv_writer = csv.writer(csv_file, delimiter=',') # cpu_usage_data = generateSemiRandomCPUUsage(ts, i) cpu_usage_data = callPrometheus(ts, i) print "\n\n--------- cpu_usage_data", cpu_usage_data output = runDatapointThroughModel(model, cpu_usage_data, shifter, anomalyLikelihood) squared_error = 0 mse_value = 0 if output['prediction']: squared_error = (prev_prediction - float(cpu_usage_data[1])) ** 2 prev_prediction = float(output['prediction']) squared_error_sum += abs(squared_error) mse_value = squared_error_sum/float(i+1) if cur_period % SCALE_PERIOD == 0: # reset period counter cur_period = 1 # calculate buffered value buffered_prediction = calculate_buffer(cpu_usage_data[1],output['prediction'], mse_value) else: cur_period += 1 #print "current_cpu_share", current_cpu_share print "buffered_prediction: ", buffered_prediction container_name = 'test-container-cpu-ram-stress-scale' if round(buffered_prediction) > 30: #print "current_cpu_share", current_cpu_share if cpus >= 0.1: print "cpus reducable" cpus -= 0.02 scale(cpus, container_name) print "scale down" if round(buffered_prediction) < 3: if cpus <= 2: print "cpus increasable" cpus += 0.02 scale(cpus, container_name) print "scale up" csv_writer.writerow([cpu_usage_data[0], output['prediction'], round(cpu_usage_data[1]), mse_value, buffered_prediction]) # print "\ncpu_usage_data[0]: ", cpu_usage_data[0] # print "prediction: ", output['prediction'] # print "round(cpu...): ", round(cpu_usage_data[1]) # print "mse_value", mse_value # print "buffered_prediction", buffered_prediction time.sleep(8)
def testEquals(self): l = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2) l2 = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2) self.assertEqual(l, l2) l2.anomalyProbability(5, 0.1, timestamp=1) # burn in l2.anomalyProbability(5, 0.1, timestamp=2) l2.anomalyProbability(5, 0.1, timestamp=3) l2.anomalyProbability(5, 0.1, timestamp=4) # use 5>2+2 probatory period samples # to create distribution estimate l2.anomalyProbability(1, 0.3, timestamp=5) self.assertNotEqual(l, l2) l.anomalyProbability(5, 0.1, timestamp=1) # burn in l.anomalyProbability(5, 0.1, timestamp=2) l.anomalyProbability(5, 0.1, timestamp=3) l.anomalyProbability(5, 0.1, timestamp=4) l.anomalyProbability(1, 0.3, timestamp=5) self.assertEqual(l, l2, "equal? \n%s\n vs. \n%s" % (l, l2))
def testSerialization(self): """serialization using pickle""" l = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2) l.anomalyProbability("hi", 0.1, timestamp=1) # burn in l.anomalyProbability("hi", 0.1, timestamp=2) l.anomalyProbability("hello", 0.3, timestamp=3) stored = pickle.dumps(l) restored = pickle.loads(stored) self.assertEqual(l, restored)
def testdWindowSizeImpactOnEstimateAnomalyLikelihoodsArgs(self): # Verify that AnomalyLikelihood's historicWindowSize plays nice with args # passed to estimateAnomalyLikelihoods""" originalEstimateAnomalyLikelihoods = an.estimateAnomalyLikelihoods estimationArgs = [] def estimateAnomalyLikelihoodsWrap(anomalyScores, averagingWindow=10, skipRecords=0, verbosity=0): estimationArgs.append((tuple(anomalyScores), skipRecords)) return originalEstimateAnomalyLikelihoods( anomalyScores, averagingWindow=averagingWindow, skipRecords=skipRecords, verbosity=verbosity) estimateAnomalyLikelihoodsPatch = mock.patch( "nupic.algorithms.anomaly_likelihood.estimateAnomalyLikelihoods", side_effect=estimateAnomalyLikelihoodsWrap, autospec=True) with estimateAnomalyLikelihoodsPatch as estimateAnomalyLikelihoodsMock: l = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2, historicWindowSize=3) l.anomalyProbability(10, 0.1, timestamp=1) self.assertEqual(estimateAnomalyLikelihoodsMock.call_count, 0) l.anomalyProbability(20, 0.2, timestamp=2) self.assertEqual(estimateAnomalyLikelihoodsMock.call_count, 0) l.anomalyProbability(30, 0.3, timestamp=3) self.assertEqual(estimateAnomalyLikelihoodsMock.call_count, 0) l.anomalyProbability(40, 0.4, timestamp=4) self.assertEqual(estimateAnomalyLikelihoodsMock.call_count, 0) # Estimation should kick in after claLearningPeriod + estimationSamples # samples have been ingested l.anomalyProbability(50, 0.5, timestamp=5) self.assertEqual(estimateAnomalyLikelihoodsMock.call_count, 1) # NOTE: we cannot use mock's assert_called_with, because the sliding # window container changes in-place after estimateAnomalyLikelihoods is # called scores, numSkip = estimationArgs.pop() self.assertEqual(scores, ((2, 20, 0.2), (3, 30, 0.3), (4, 40, 0.4))) self.assertEqual(numSkip, 1)
def __init__(self, *args, **kwargs): super(NuPICFileOutput, self).__init__(*args, **kwargs) self.outputFile = None self.outputWriter = None self.lineCount = None self.lineCount = 0 outputFilePath = os.path.join(self.path, "%s.csv" % self.name) print "Preparing to output %s data to %s" % (self.name, outputFilePath) self.outputFile = open(outputFilePath, "w") self.outputWriter = csv.writer(self.outputFile) self._headerWritten = False self.anomalyLikelihoodHelper = anomaly_likelihood.AnomalyLikelihood()
def reset(request): guid = request.matchdict['guid'] has_model = guid in models if has_model: print "resetting model", guid models[guid]['model'].resetSequenceStates() models[guid]['seen'] = 0 models[guid]['last'] = None models[guid]['alh'] = anomaly_likelihood.AnomalyLikelihood() else: request.response.status = 404 return no_model_error() return {'success': has_model, 'guid': guid}
def runModel(startProcessingAt, stopProcessingAt, aggregation, modelParamsPath): # Create the influxhtm client and get a sensor. client = InfluxHtmClient("smartthings_htm_bridge") fridge = client.getSensor(measurement="power", component="Mini+Fridge") # Make sure there are is no existing HTM data for this sensor. fridge.deleteHtmModels() # And create a new storage space for this model I'm creating. modelStore = fridge.createHtmModel("mtaylor_local_mini_fridge") # Create a real HTM model object through the NuPIC OPF. htmModel = createModel(modelParamsPath) shifter = InferenceShifter() anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood() # This is the function that will process each data point through the real HTM # model we created above. def htmProcessor(point): # Time strings are evil! We have to make sure it is formatted properly for # NuPIC. timeString = point[0] if "." in timeString: timeString = timeString.split(".").pop(0) else: timeString = timeString.split("Z").pop(0) timestamp = datetime.strptime(timeString, DATE_FORMAT) # This is the value. value = point[1] # Here's where the magic happens ;) result = htmModel.run({"timestamp": timestamp, "value": value}) # Shifting results because we are plotting. result = shifter.shift(result) # Prepare a result object for writing into InfluxDB. inferences = result.inferences anomalyScore = inferences["anomalyScore"] # This breaks out when point has not data. if value is None or anomalyScore is None: return None likelihood = anomalyLikelihood.anomalyProbability( value, anomalyScore, timestamp) return { "inferences": result.inferences, "anomalyLikelihood": likelihood } modelStore.processData(htmProcessor, since=startProcessingAt, until=stopProcessingAt, aggregation=aggregation)
def runAnomaly(model, training_file, anomaly_threshold=0.9): #lines anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( historicWindowSize=144) #, learningPeriod=1152 for index, record in training_file.iterrows(): modelInput = {"c0": str(record[0]), "c1": record[1]} value = modelInput["c1"] modelInput["c0"] = datetime.datetime.strptime( modelInput["c0"], "%Y-%m-%d %H:%M:%S" ) #- timedelta(hours=2) #resta de 2h por error horas Spark, para devolver a su hora original timestamp = modelInput["c0"] result = model.run(modelInput) anomalyScore = result.inferences['anomalyScore'] anomalyLikelyhood2 = anomalyLikelihood.anomalyProbability( value, anomalyScore, timestamp) print("anScoreANTERIOR: ", anomalyScore, ", anLikelihood2ANTERIOR: ", anomalyLikelyhood2) #version sin for: # modelInput = {"c0": "", "c1": 0} # timestamp = training_file._values[-1][0] # value = training_file._values[-1][1] # modelInput["c0"] = datetime.datetime.strptime(str(timestamp), "%Y-%m-%d %H:%M:%S") # modelInput["c1"] = value # result = model.run(modelInput) # anomalyScore = result.inferences['anomalyScore'] # anomalyLikelyhood3 = anomalyLikelihood.anomalyProbability(value, anomalyScore, modelInput["c0"]) # print("anScoreSINFOR: ", anomalyScore, ", anLikelihood2SINFOR: ", anomalyLikelyhood3) #RESULTADOS: VARIAN PERO NO DE MANERA MUY SIGNIFICATIVA DIRIA, LO QUE PODEMOS HACER ES REDUCIR EL NUMERO DE TRAINING DATA QUE SE GUARDAN EN LOS FICHEROS PARA LAS PRUEBAS DEL PROTOTIO, # PODEMOS PONER UNA VENTANA DESIZANTE E IR BORRANDO DATOS APRA QUE NO SE HAGA ENORME EL CSV Y ADEMAS QUE PODEMOS SUPONER QUE LO NORMAL ES LO QUE OCURRE MAS RECIENTEMENTE, Y SI HA HABIDO ALGUNA ANOMALIA SE HABRIA INFORMADO EN CASOS ANTERIORES # (ESTO SI QUEREMOS PORUQE PARA LAS PRUEBAS NO NOS PEDIRAN NADA) # ('anScoreANTERIOR: ', 0.0, ', anLikelihood2ANTERIOR: ', 0.54721270106335917) # ('anScoreSINFOR: ', 0.0, ', anLikelihood2SINFOR: ', 0.56844323632835281) # # ('anScoreANTERIOR: ', 0.25, ', anLikelihood2ANTERIOR: ', 0.9821310895610571) # ('anScoreSINFOR: ', 0.075000003, ', anLikelihood2SINFOR: ', 0.98621086501000965) # # ('anScoreANTERIOR: ', 0.0, ', anLikelihood2ANTERIOR: ', 0.91637213745567192) # ('anScoreSINFOR: ', 0.0, ', anLikelihood2SINFOR: ', 0.91637213745567192) # # ('anScoreANTERIOR: ', 0.0, ', anLikelihood2ANTERIOR: ', 0.91637213745567192) # ('anScoreSINFOR: ', 0.0, ', anLikelihood2SINFOR: ', 0.91637213745567192) # # ('anScoreANTERIOR: ', 0.0, ', anLikelihood2ANTERIOR: ', 0.9999107159364139) # ('anScoreSINFOR: ', 0.0, ', anLikelihood2SINFOR: ', 0.9999107159364139) if anomalyLikelyhood2 > anomaly_threshold: anomaly = 1 else: anomaly = 0 return anomaly, timestamp
def run(self): anomaly_likelihood_helper = anomaly_likelihood.AnomalyLikelihood(50, 50) model = create_single_sensor_model(self.sensor, self.intersection) while not self.done: try: val = self.queue_in.get(True, 1) except Empty: continue result = model.run(val) prediction = result.inferences["multiStepBestPredictions"][1] anomaly_score = result.inferences["anomalyScore"] likelihood = anomaly_likelihood_helper.anomalyProbability( val[self.sensor], anomaly_score, val['timestamp']) self.queue_out.put((self.sensor, prediction, anomaly_score, likelihood))
def testEquals(self): l = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2) l2 = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2) self.assertEqual(l, l2) # Use 5 iterations to force the distribution to be created (4 probationary # samples + 1) l2.anomalyProbability(5, 0.1, timestamp=1) # burn in l2.anomalyProbability(5, 0.1, timestamp=2) l2.anomalyProbability(5, 0.1, timestamp=3) l2.anomalyProbability(5, 0.1, timestamp=4) self.assertIsNone(l2._distribution) l2.anomalyProbability(1, 0.3, timestamp=5) self.assertIsNotNone(l2._distribution) self.assertNotEqual(l, l2) l.anomalyProbability(5, 0.1, timestamp=1) # burn in l.anomalyProbability(5, 0.1, timestamp=2) l.anomalyProbability(5, 0.1, timestamp=3) l.anomalyProbability(5, 0.1, timestamp=4) self.assertIsNone(l._distribution) l.anomalyProbability(1, 0.3, timestamp=5) self.assertIsNotNone(l._distribution) self.assertEqual(l, l2, "equal? \n%s\n vs. \n%s" % (l, l2))
def runDataThroughModel(model, dataFrame): shifter = InferenceShifter() anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood() out = [] for index, row in dataFrame.iterrows(): timestamp = datetime.strptime(row["timestamp"], DATE_FORMAT) value = int(row["value"]) result = model.run({"timestamp": timestamp, "value": value}) if index % 100 == 0: print "Read %i lines..." % index result = shifter.shift(result) resultOut = convertToWritableOutput(result, anomalyLikelihood) out.append(resultOut) return pd.DataFrame(out)
def runHotgym(numRecords): model = ModelFactory.create(MODEL_PARAMS) model.enableInference({"predictedField": "sine"}) with open(_INPUT_FILE_PATH) as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() results = [] anomalyScore = [] anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood() anomalyProbability = [] count = 0 rawData = [] for record in islice(reader, numRecords): print count count += 1 modelInput = dict(zip(headers, record)) #print modelInput #sys.exit() modelInput["sine"] = float(modelInput["sine"]) #modelInput["timestamp"] = datetime.datetime.strptime(modelInput["timestamp"], "%m/%d/%y %H:%M") result = model.run(modelInput) bestPredictions = result.inferences["multiStepBestPredictions"] allPredictions = result.inferences["multiStepPredictions"] rawAnomalyScore = result.inferences["anomalyScore"] anomalyScore.append(rawAnomalyScore) anomalyProbability.append( anomalyLikelihood.anomalyProbability(record[1], rawAnomalyScore)) oneStep = bestPredictions[1] oneStepConfidence = allPredictions[1][oneStep] #fiveStep = bestPredictions[5] #fiveStepConfidence = allPredictions[5][fiveStep] #result = (oneStep, oneStepConfidence * 100, fiveStep, fiveStepConfidence * 100) #print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result) predicted.append(oneStep) result = (oneStep, oneStepConfidence * 100) #print "1-step: {:16} ({:4.4}%)".format(*result) results.append(result) rawData.append(record[1]) return results, anomalyScore, anomalyProbability, rawData
def testHistoricWindowSize(self): l = an.AnomalyLikelihood(claLearningPeriod=2, estimationSamples=2, historicWindowSize=3) l.anomalyProbability(5, 0.1, timestamp=1) # burn in self.assertEqual(len(l._historicalScores), 1) l.anomalyProbability(5, 0.1, timestamp=2) self.assertEqual(len(l._historicalScores), 2) l.anomalyProbability(5, 0.1, timestamp=3) self.assertEqual(len(l._historicalScores), 3) l.anomalyProbability(5, 0.1, timestamp=4) self.assertEqual(len(l._historicalScores), 3)
def runAnomaly(): params = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[ 0 ], # just dummy data unless you want to send in some real data here minVal=38, maxVal=55, minResolution=0.001, # you may need to tune this #0.001 tmImplementation="cpp") #cpp model = createModel(params["modelConfig"]) # model.enableInference({'predictedField': 'c1'}) with open(_INPUT_DATA_FILE) as fin: reader = csv.reader(fin) csvWriter = csv.writer(open(_OUTPUT_PATH, "a")) # csvWriter.writerow(["timestamp", "value", "anomaly_score", "anomaly_likehood", "label"]) headers = reader.next() reader.next() reader.next() anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( historicWindowSize=1152) #, learningPeriod=1152 for i, record in enumerate(reader, start=1): modelInput = dict(zip(headers, record)) modelInput["c1"] = float(modelInput["c1"]) value = modelInput["c1"] modelInput["c0"] = datetime.datetime.strptime( modelInput["c0"], "%Y-%m-%d %H:%M:%S") timestamp = modelInput["c0"] result = model.run(modelInput) anomalyScore = result.inferences['anomalyScore'] anomalyLikelyhood2 = anomalyLikelihood.anomalyProbability( value, anomalyScore, timestamp) if i == lines: if anomalyLikelyhood2 > _ANOMALY_THRESHOLD: _LOGGER.info( "Anomaly detected at [%s]. Anomaly score: %f.", result.rawInput["c0"], anomalyScore) anomaly = 1 else: anomaly = 0 csvWriter.writerow([ timestamp, value, anomalyScore, anomalyLikelyhood2, anomaly ]) return anomaly # else: # csvWriter.writerow([timestamp, value, anomalyScore, anomalyLikelyhood2, modelInput["label"]]) print("Anomaly scores have been written to " + _OUTPUT_PATH)
def main(inputPath): # data/nyc_taxi.csv model = createPredictionModel() ts = time.time() # current time shifter = InferenceShifter() anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood() for i in range(0, 10000): with open('./out/realtime_prediction.csv', mode='a') as csv_file: csv_writer = csv.writer(csv_file, delimiter=',') cpu_usage_data = generateSemiRandomCPUUsage(ts, i) output = runDatapointThroughModel(model, cpu_usage_data, shifter, anomalyLikelihood) print(output) csv_writer.writerow([ cpu_usage_data[0], output['prediction'], round(cpu_usage_data[1]) ]) time.sleep(0.10)
def model_create(request): guid = str(uuid4()) predicted_field = None try: params = request.json_body except ValueError: params = None if params: if 'guid' in params: guid = params['guid'] if guid in models.keys(): request.response.status = 409 return {'error': 'The guid "' + guid + '" is not unique.'} if 'modelParams' not in params: request.response.status = 400 return {'error': 'POST body must include JSON with a modelParams value.'} if 'predictedField' in params: predicted_field = params['predictedField'] params = params['modelParams'] msg = 'Used provided model parameters' else: params = importlib.import_module('model_params.model_params').MODEL_PARAMS['modelConfig'] msg = 'Using default parameters, timestamp is field c0 and input and predictedField is c1' predicted_field = 'c1' model = ModelFactory.create(params) if predicted_field is not None: print "Enabled predicted field: {0}".format(predicted_field) model.enableInference({'predictedField': predicted_field}) else: print "No predicted field enabled." models[guid] = { 'model': model, 'pfield': predicted_field, 'params': params, 'seen': 0, 'last': None, 'alh': anomaly_likelihood.AnomalyLikelihood(), 'tfield': find_temporal_field(params) } print "Made model", guid return {'guid': guid, 'params': params, 'predicted_field': predicted_field, 'info': msg, 'tfield': models[guid]['tfield']}
def main(options): # Create Prediction Model model = createPredictionModel() shifter = InferenceShifter() anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood() # Append data to csv file csvFile = getCsvFile(options.mode, options.container,options.part) for i in range (0, 10000): with open(csvFile, mode='w') as csv_file: csv_writer = csv.writer(csv_file, delimiter=',') usage_data = next(generateRandomUsageData()) # get prometheus data or generated data print "USAGE DATA: ", usage_data output = runDatapointThroughModel(model, usage_data, shifter, anomalyLikelihood) print(output) csv_writer.writerow([usage_data[0], output['prediction'], round(usage_data[1])]) time.sleep(0.10)
def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 self.modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin - rangePadding, maxVal=self.inputMax + rangePadding, minResolution=0.001, tmImplementation="tm_cpp")["modelConfig"] self._setupEncoderParams( self.modelParams["modelParams"]["sensorParams"]["encoders"]) # Initialize the anomaly likelihood object numentaLearningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100)
def initialize(self, lower_data_limit=-1e9, upper_data_limit=1e9, probation_number=750, spatial_tolerance=0.05): """ Any data that is not in the range [lower_data_limit, upper_data_limit] will be regarded as anomaly directly the algorithm will treat the first probation_number input as a reference to calculate likelihood It is expect that no anomaly should be in the first probation_number sample, the longer the better """ self.probationary_period = probation_number self.input_min = lower_data_limit self.input_max = upper_data_limit # Fraction outside of the range of values seen so far that will be considered # a spatial anomaly regardless of the anomaly likelihood calculation. This # accounts for the human labelling bias for spatial values larger than what # has been seen so far. self.spatial_tolerance = spatial_tolerance # Get config params, setting the RDSE resolution range_padding = abs(self.input_max - self.input_min) * 0.2 model_params = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.input_min - range_padding, maxVal=self.input_max + range_padding, minResolution=0.001, tmImplementation="cpp" )["modelConfig"] self._setupEncoderParams( model_params["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(model_params) self.model.enableInference({"predictedField": "value"}) if self.useLikelihood: # Initialize the anomaly likelihood object numenta_learning_period = int(math.floor(self.probationary_period / 2.0)) self.anomaly_likelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numenta_learning_period, estimationSamples=self.probationary_period - numenta_learning_period, reestimationPeriod=100 )
def __init__( self, minValue, maxValue, probationaryPeriod=50, normValue=7, memoryWindow=2): super(DasrsLikelihood, self).__init__( minValue, maxValue, probationaryPeriod, normValue, memoryWindow ) numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 1.0)) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod-numentaLearningPeriod, reestimationPeriod=100 ) self.minVal = None self.maxVal = None
def initialize(self, inputMin, inputMax): # Get config params, setting the RDSE resolution self.inputMin = inputMin self.inputMax = inputMax rangePadding = abs(self.inputMax - self.inputMin) * 0.2 modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin - rangePadding, maxVal=self.inputMax + rangePadding, minResolution=0.001, tmImplementation="cpp")["modelConfig"] self._setupEncoderParams( modelParams["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(modelParams) self.model.enableInference({"predictedField": "value"}) # Initialize the anomaly likelihood object self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood()
def HTM(data, model): shifter = InferenceShifter() counter = 0 anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=None, learningPeriod=100, estimationSamples=10, historicWindowSize=800, reestimationPeriod=1) actuals = np.zeros(len(data)) predictions = np.zeros(len(data)) anomalyScores = np.zeros(len(data)) anomalyLikelihoods = np.zeros(len(data)) #For every timestep in time serie. for t in range(len(data)): counter += 1 if (counter % 100 == 0): pass #print "Read %i lines..." % counter consumption = float(data[t]) result = model.run({"kw_energy_consumption": consumption}) result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] actuals[t] = data[t] predictions[t] = prediction anomalyScores[t] = anomalyScore likelihood = anomalyLikelihood.anomalyProbability( consumption, anomalyScore) anomalyLikelihoods[t] = likelihood return predictions, anomalyScores, anomalyLikelihoods