def runDataset(dataset): """ Runs through the dataset given for anomaly detection """ # set model parameters, csv path, and output csv/plot if (dataset == 0): model_par = machine_model_params csv_path = "./data/machine_temperature_system_failure.csv" outputCSVFile = nupic_output.NuPICFileOutput( ["Machine_Temp_Sys_Failure_OUTPUT_CSV"]) outputPlotFile = nupic_output.NuPICPlotOutput( ["Machine_Temp_Sys_Failure_OUTPUT_PLOT"]) elif (dataset == 1): model_par = twitter_model_params csv_path = "./data/Twitter_volume_GOOG.csv" outputCSVFile = nupic_output.NuPICFileOutput( ["Twitter_Volume_Google_OUTPUT_CSV"]) outputPlotFile = nupic_output.NuPICPlotOutput( ["Twitter_Volume_Google_OUTPUT_PLOT"]) else: print("No specified dataset, error will occur") model_params = None # create model model = createModel(model_par) #run model runModel(model, csv_path, outputCSVFile, outputPlotFile)
def runIoThroughNupic(inputData, model, gymName, plot): inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_output.NuPICPlotOutput([gymName]) else: output = nupic_output.NuPICFileOutput([gymName]) counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) consumption = float(row[1]) result = model.run({ "timestamp": timestamp, "kw_energy_consumption": consumption }) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] output.write([timestamp], [consumption], [prediction]) inputFile.close() output.close()
def runIoThroughNupic(inputPath, model, modelName, plot=False): with open(inputPath, "rb") as inputFile: csvReader = csv.reader(inputFile) # skip header rows headers = csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_output.NuPICPlotOutput(modelName) else: output = nupic_output.NuPICFileOutput(modelName, path="data") counter = 0 for row in csvReader: assert len(row) == len(headers) counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter row = [float(row[0])] + [int(val) for val in row[1:]] input_row = dict(zip(headers, row)) result = model.run(input_row) if plot: seconds = input_row["seconds"] actual = input_row[PREDICTED_BUCKET] shifter.shift(result) predicted = result.inferences["multiStepBestPredictions"][1] output.write([seconds], [actual], [predicted]) else: output.write(input_row, result) output.close()
def run_io_through_nupic(input_data, model, file_name, plot, print_results): input_file = open(input_data, "rb") csv_reader = csv.reader(input_file) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() shifter = InferenceShifter() if plot: output = nupic_output.NuPICPlotOutput([file_name]) else: output = nupic_output.NuPICFileOutput([file_name]) metrics_manager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(), model.getInferenceType()) counter = 0 timestamp = None consumption = None result = None for row in csv_reader: counter += 1 timestamp = datetime.datetime.strptime(row[1], DATE_FORMAT) consumption = int(row[2]) amount = float(row[0]) result = model.run({ "amount": amount, "date": timestamp, "tte": consumption }) result.metrics = metrics_manager.update(result) if counter % 100 == 0 or counter % 384 == 0: print "Read %i lines..." % counter print ("After %i records, rmse=%f" % (counter, result.metrics["multiStepBestPredictions:multiStep:" "errorMetric='rmse':steps=1:window=1000:" "field=tte"])) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] output.write([timestamp], [consumption], [prediction]) if print_results: print("date:", timestamp.strftime("%y-%m-%d"), "actual:", consumption, "predicted:", prediction) if plot and counter % 20 == 0: output.refresh_gui() #if plot and counter % 1000 == 0: # break input_file.close() output.close()
def runDataThroughNupic(MODELS, anomaly_helper, inputData, systemName): ANOMALY_OBJ = nupic_output.NuPICFileOutput(systemName) ANOMALY_OBJ.anomalyLikelihoodHelper = anomaly_helper ANOMALY_LIKELIHOOD = [0.0 for i in range(len(MODEL_NAMES))] inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() counter = 0 metricsManager = [0 for i in range(len(MODELS))] for row in csvReader: counter += 1 timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) for model_index in range(len(MODELS)): data = float(row[model_index + 1]) inference_type = MODEL_NAMES[model_index] result = MODELS[model_index].run({ "timestamp": timestamp, inference_type: data }) if counter % 20 == 0: print(str(counter) + ":" + inference_type) result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] anomalyLikelihood = ANOMALY_OBJ.get_anomaly_likelihood( timestamp, data, prediction, anomalyScore) ANOMALY_LIKELIHOOD[model_index] = anomalyLikelihood ANOMALY_OBJ.write(timestamp, ANOMALY_LIKELIHOOD) inputFile.close() ANOMALY_OBJ.close() print("Saving Anomaly Object") path = os.path.join(os.getcwd(), "objects/") if os.path.isdir(path) is False: os.mkdir('objects') with open('objects/anomaly_object.pkl', 'wb') as o: pickle.dump(ANOMALY_OBJ.anomalyLikelihoodHelper, o)
def initalizeModels(): MODELS = [] for index in range(len(MODEL_NAMES)): model = ModelFactory.create(getModelParamsFromName(MODEL_NAMES[index])) model.enableInference({"predictedField": MODEL_NAMES[index]}) MODELS.append(model) anomaly = nupic_output.NuPICFileOutput(SYSTEM_NAME) ANOMALY_OBJ = anomaly.anomalyLikelihoodHelper return MODELS, ANOMALY_OBJ
def runIoThroughNupic(inputData, model, name, plot): inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if len(plot) == 0: for field in SWARM_DESCRIPTION["includedFields"]: plot.append(field["fieldName"]) output = nupic_output.NuPICFileOutput(name, plot) else: output = nupic_output.NuPICPlotOutput(name, plot) metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(), model.getInferenceType()) counter = 0 for row in csvReader: counter += 1 data = {} fldCounter = 0 for field in SWARM_DESCRIPTION["includedFields"]: data[field["fieldName"]] = translate_data(field["fieldType"], row[fldCounter]) fldCounter += 1 result = model.run(data) result.metrics = metricsManager.update(result) if options.verbose is not None and counter % 100 == 0: print "Read %i lines..." % counter print ("After %i records, 1-step altMAPE=%f" % (counter, result.metrics["multiStepBestPredictions:multiStep:" "errorMetric='altMAPE':steps=1:window=1000:" "field="+PREDICTED_FIELD])) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] vals = [] for field in plot: vals.append(data[field]) output.write(vals, prediction) inputFile.close() output.close()
def runIoThroughNupic(inputData, model, gymName, plot): inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_output.NuPICPlotOutput([gymName]) else: output = nupic_output.NuPICFileOutput([gymName]) metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(), model.getInferenceType()) counter = 0 for row in csvReader: counter += 1 timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) consumption = float(row[1]) result = model.run({ "timestamp": timestamp, "kw_energy_consumption": consumption }) result.metrics = metricsManager.update(result) if counter % 100 == 0: print "Read %i lines..." % counter print("After %i records, 1-step altMAPE=%f" % (counter, result.metrics["multiStepBestPredictions:multiStep:" "errorMetric='altMAPE':steps=1:window=1000:" "field=kw_energy_consumption"])) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] output.write([timestamp], [consumption], [prediction]) if plot and counter % 20 == 0: output.refreshGUI() inputFile.close() output.close()
def runIoThroughNupic( inputData, model, textName, word_list ): inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() output = nupic_output.NuPICFileOutput([textName]) metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(), model.getInferenceType()) model.enableLearning() counter = 0 for row in csvReader: counter += 1 reset_flag = bool( row[0] ) word_num = int( row[1] ) if reset_flag: print( 'resetting model' ) model.resetSequenceStates() result = model.run({ "word_num": word_num }) result.metrics = metricsManager.update(result) if counter % 100 == 0: print "Read %i lines..." % counter print ("After %i records, 1-step altMAPE=%f", counter, result.metrics["multiStepBestPredictions:multiStep:" "errorMetric='altMAPE':steps=1:window=1000:" "field=word_num"]) model.finishLearning() model.save( MODEL_DIR ) inputFile.close() output.close()
def runIoThroughNupic(inputData, model, dataName, plot): inputFile = open(inputData, "rU") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_output.NuPICPlotOutput([dataName]) else: output = nupic_output.NuPICFileOutput([dataName]) metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(), model.getInferenceType()) counter = 0 for row in csvReader: counter += 1 y = float(row[0]) result = model.run({"y": y}) result.metrics = metricsManager.update(result) if counter % 100 == 0: print "Read %i lines..." % counter print( "After %i records, 1-step nrmse=%f", counter, result.metrics["multiStepBestPredictions:multiStep:" "errorMetric='nrmse':steps=1:window=1000000:" "field=y"]) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] output.write([0], [y], [prediction]) inputFile.close() output.close()
def runIoThroughNupic(inputData, model, modelName, plot): inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_output.NuPICPlotOutput([modelName]) else: output = nupic_output.NuPICFileOutput([modelName]) counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) #consumption = float(row[1]) subject=row[0] verb=row[1] obj=row[2] result = model.run({ "subject": subject, "verb": verb, "object":obj }) if plot: result = shifter.shift(result) print "result: " + repr(result.inferences["multiStepBestPredictions"]) prediction = result.inferences["multiStepBestPredictions"][1] output.write([subject], [verb], [obj], [prediction]) inputFile.close() output.close()
time_step = [] actual_data = [] patternNZ_track = [] predict_data = np.zeros((_options.stepsAhead, 0)) predict_data_ML = [] negLL_track = [] activeCellNum = [] predCellNum = [] predSegmentNum = [] predictedActiveColumnsNum = [] trueBucketIndex = [] sp = model._getSPRegion().getSelf()._sfdr spActiveCellsCount = np.zeros(sp.getColumnDimensions()) output = nupic_output.NuPICFileOutput([dataSet]) for i in xrange(len(df)): inputRecord = getInputRecord(df, predictedField, i) tp = model._getTPRegion() tm = tp.getSelf()._tfdr prePredictiveCells = tm.getPredictiveCells() prePredictiveColumn = np.array( list(prePredictiveCells)) / tm.cellsPerColumn result = model.run(inputRecord) trueBucketIndex.append( model._getClassifierInputRecord(inputRecord).bucketIndex) predSegmentNum.append(len(tm.activeSegments))
predictDataNN = [] negLLTrack = [] activeCellNum = [] predCellNum = [] predictedActiveColumnsNum = [] trueBucketIndex = [] sp = model._getSPRegion().getSelf()._sfdr spActiveCellsCount = np.zeros(sp.getColumnDimensions()) if noise > 0: datasetName = dataSet + "noise_{:.2f}".format(noise) else: datasetName = dataSet output = nupic_output.NuPICFileOutput([datasetName]) for i in xrange(len(df)): inputRecord = getInputRecord(df, predictedField, i, noise) tp = model._getTPRegion() tm = tp.getSelf()._tfdr prePredictiveCells = tm.predictiveCells prePredictiveColumn = np.array(list(prePredictiveCells)) / tm.cellsPerColumn # run model on the input Record result = model.run(inputRecord) # record and analyze the result trueBucketIndex.append( model._getClassifierInputRecord(inputRecord).bucketIndex)