def runDataset(dataset): """ Runs through the dataset given for anomaly detection """ # set model parameters, csv path, and output csv/plot if (dataset == 0): model_par = machine_model_params csv_path = "./data/machine_temperature_system_failure.csv" nupic_output.WINDOW = 22694 nupic_output.ANOMALY_THRESHOLD = 0.97 outputCSVFile = nupic_output.NuPICFileOutput( "Machine_Temp_Sys_Failure_OUTPUT_ANOMALY_CSV") outputPlotFile = nupic_output.NuPICPlotOutput( "Machine_Temp_Sys_Failure_OUTPUT_ANOMALY_PLOT") elif (dataset == 1): model_par = twitter_model_params csv_path = "./data/Twitter_volume_GOOG.csv" print(nupic_output.WINDOW) nupic_output.WINDOW = 15841 print(nupic_output.WINDOW) outputCSVFile = nupic_output.NuPICFileOutput( "Twitter_Volume_Google_OUTPUT_ANOMALY_CSV") outputPlotFile = nupic_output.NuPICPlotOutput( "Twitter_Volume_Google_OUTPUT_ANOMALY_PLOT") else: print("No specified dataset, error will occur") model_params = None # create model model = createModel(model_par) #run model runModel(model, csv_path, outputCSVFile, outputPlotFile)
def runIoThroughNupic(inputData, model, gymName, plot, load): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param gymName: Gym name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(gymName) else: output = nupic_anomaly_output.NuPICFileOutput(gymName) counter = 0 # using dummy time to debug timestamp = datetime.datetime.strptime(csvReader.next()[0], DATE_FORMAT) print("DEBUG_PRINT: initiali time", timestamp) for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter timestamp = timestamp + datetime.timedelta(microseconds=10000) #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) consumption = float(row[2]) rawValue = float(row[1]) result = model.run({ "timestamp": timestamp, "wavelet_value": consumption }) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, consumption, prediction, anomalyScore, rawValue) if not load: print("saving model for MODEL_DIR") model.save(MODEL_DIR) inputFile.close() output.close() return model
def runModel(model): inputFilePath = "disease_person1.csv" inputFile = open(inputFilePath, "rb") csvReader = csv.reader(inputFile) #skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() output = nupic_output.NuPICPlotOutput("ECG") counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) #timestamp = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f')[:-6] value = int(row[1]) result = model.run({"timestamp": timestamp, "value": value}) result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, value, prediction, anomalyScore) inputFile.close() output.close()
def runHarddriveAnomaly(plot): shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput('Harddrive Learning') else: output = nupic_anomaly_output.NuPICFileOutput('Harddrive Learning') _LOGGER.info('start with anomaly detection...') model = getModel('good') model.enableInference({'predictedField': 'class'}) _LOGGER.info('read data file...') data = pd.read_csv('harddrive-smart-data-test.csv') dictionary = data.transpose().to_dict().values() for row in dictionary: csvWriter = csv.writer(open(_OUTPUT_PATH, "wa")) csvWriter.writerow(["class", "anomaly_score"]) result = model.run(row) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] output.write('', result.rawInput["class"], prediction, anomalyScore) csvWriter.writerow([row["class"], anomalyScore]) if anomalyScore > _ANOMALY_THRESHOLD: _LOGGER.info("Anomaly detected at [%s]. Anomaly score: %f.", result.rawInput["class"], anomalyScore)
def runIoThroughNupic(inputData, model, metric, sensor, patientId, plot): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param csvName: CSV name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile.read().splitlines()) # skip header rows csvReader.next() csvReader.next() csvReader.next() csvName = "%s_%s_%s" % (metric, sensor, patientId) print "running model with model_params '%s'" % csvName shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(csvName) else: if not os.path.exists(MODEL_RESULTS_DIR): os.makedirs(MODEL_RESULTS_DIR) output = nupic_anomaly_output.NuPICFileOutput("%s/%s" % (MODEL_RESULTS_DIR, csvName)) counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter metric_value = float(row[0]) result = model.run({ "metric_value": metric_value }) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][0] anomalyScore = result.inferences["anomalyScore"] output.write(counter, metric_value, prediction, anomalyScore) output.close() inputFile.close()
def runIoThroughNupic(inputData, model, app, plot): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param gymName: Gym name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(app) else: output = nupic_anomaly_output.NuPICFileOutput(app) counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) #consumption = float(row[1]) packets = int(row[8]) bytes = int(row[9]) duration = float(row[10]) result = model.run({ "packets": packets, "bytes": bytes, "duration": duration }) #print result #prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.plot(counter, anomalyScore) #output.write(duration,packets,0, anomalyScore) inputFile.close() output.close()
def runIoThroughNupic(inputData, model, gymName, plot): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param gymName: Gym name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ # t0 = time() # # model = load(open('model.pkl', 'rb')) # model = ModelFactory.loadFromCheckpoint('/home/magarwal/logoDetective/core/anomalyDetection/nupic/nupic/examples/opf/clients/hotgym/anomaly/one_gym/model_save/model.pkl') # print 'time taken in loading model = ',time()-t0 inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(gymName) else: output = nupic_anomaly_output.NuPICFileOutput(gymName) counter = 0 for row in csvReader: counter += 1 if (counter % 1000 == 0): print "Read %i lines..." % counter timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) feat = float(row[1]) result = model.run({"time_start": timestamp, FEAT_NAME: feat}) if plot: result = shifter.shift(result) # print 'result = ',result prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, feat, prediction, anomalyScore) inputFile.close() output.close()
def runModel(model, inputFilePath): inputFile = open(inputFilePath, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() output = nupic_output.NuPICPlotOutput("Vaccination") shifter = InferenceShifter() counter = 0 actualCount = 0 predictCount = 0 miss = 0 hit = 0 for row in csvReader: counter += 1 if(counter % 10 == 0): print "Read %i lines..." % counter vaccine_date = datetime.datetime.strptime(row[2], DATE_FORMAT) vaccine_name = str(row[1]) result = model.run({ "vaccine_date": vaccine_date, "vaccine_name": vaccine_name }) result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] #output.write([vaccine_date], [vaccine_name], [prediction]) print len(vaccine_name) output.write(vaccine_date, len(vaccine_name), len(prediction), anomalyScore) if prediction == "Yellow Fever": predictCount += 1 if vaccine_name == "Yellow Fever": actualCount += 1 if vaccine_name == prediction: hit += 1 else: miss += 1 print counter, "community member_id: ", row[0], "Actual: ", vaccine_name, "Predicted: ", prediction, "------", anomalyScore print"\n Number of actuals: ", actualCount," \n Number of predictions: ", predictCount print "\n hits: ", hit,"\n misses: ", miss
def runIoThroughNupic(inputData, model, InputName, plot): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param InputName: Gym name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(InputName) else: output = nupic_anomaly_output.NuPICFileOutput(InputName) counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) PredFldNm = float(row[1]) result = model.run({ "c0": timestamp, "c1": PredFldNm }) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, PredFldNm, prediction, anomalyScore) inputFile.close() output.close()
def runIoThroughNupic(inputData, model, modelName, plot): inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows headers = csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(modelName) else: output = nupic_anomaly_output.NuPICFileOutput(modelName) metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(), model.getInferenceType()) counter = 0 for row in csvReader: counter += 1 timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) consumption = float(row[1]) result = model.run({"Time": timestamp, PREDICTED_FIELD: consumption}) result.metrics = metricsManager.update(result) if counter % 100 == 0: print "Read %i lines..." % counter print( "After %i records, 1-step altMAPE=%f", counter, result.metrics["multiStepBestPredictions:multiStep:" "errorMetric='altMAPE':steps=1:window=1000:" "field=%s" % PREDICTED_FIELD]) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, consumption, prediction, anomalyScore) inputFile.close() output.close()
def run_io_through_nupic(input_data, model, metric_name, plot): input_file = open(input_data, "rb") csv_reader = csv.reader(input_file) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() shifter = InferenceShifter if plot: output = nupic_anomaly_output.NuPICPlotOutput(metric_name) else: output = nupic_anomaly_output.NuPICFileOutput(metric_name) counter = 0 for row in csv_reader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter timestamp = datetime.strptime(row[1], DATE_FORMAT) value = float(row[0]) result = model.run({ "timestamp": timestamp, "value": value }) if plot: result = shifter.shift(result) print "Line %d" % counter prediction = result.inferences["multiStepBestPredictions"][1] anomaly_score = result.inferences["anomalyScore"] output.write(timestamp, value, prediction, anomaly_score) input_file.close() output.close()
def process_upstream_model(model, sensors): """ :param intersections: a dict of {'upstream':{'id':3043,'query': 3084(1+2+3+4+5)-3032(1+2+3+4)+3084(10+11)+3044(5+6+7+8+9+10+11)-3084(1+2+3+4+5)+3032(1+2+3+4)-3084(10+11)}, 'downstream':{'id':'3084','sensors':[1,2,3,4}} :return: """ import re # from collections import defaultdict # # upstream_sensors = set(re.findall(r"(-?\d+\(.*?\))", sensors['upstream']['query'])) # queries = [] # for i in upstream_sensors: # if i[0] == '-': # if i[1:] not in upstream_sensors: # queries.append(i) # else: # if '-' + i not in upstream_sensors: # queries.append(i) # # make a dict of intersections and the sensors we need to read from them # upstream_intersections = defaultdict(defaultdict) # for query in queries: # split = query.split('(') # intersection, isensors = split[0], split[1][:-1].split('+') # upstream_intersections[intersection]['sensors'] = map(lambda x: str(int(x)*8), isensors) # upstream_intersections[intersection]['subtract'] = query[0] == '-' # sensors_to_fetch = [sensors['downstream']['id']] + upstream_intersections.keys() # readings = readings_collection.find({'site_no': sensors['downstream']['id']}, # {'anomalies': False, 'predictions': False}, no_cursor_timeout=True). \ # sort('datetime', pymongo.ASCENDING) import nupic_anomaly_output output = nupic_anomaly_output.NuPICPlotOutput("Traffic Volume from " + sensors['upstream']['id'] + " to " + sensors['downstream']['id']) # print "Upstream:", sensors['upstream'] # print "Downstream:", sensors['downstream'] # input() # with open('readings.csv', 'w') as out: # import csv # writer = csv.DictWriter(out, fieldnames=['timestamp', 'downstream']) # writer.writeheader() with open('readings.csv', 'r') as infile: import csv readings = csv.DictReader(infile) readings.next() readings.next() for reading in readings: # current_readings = {i['site_no']: i for i in [next(readings) for _ in range(len(sensors_to_fetch))]+[x]} # times = pluck(current_readings.values(), 'datetime') # if not times.count(times[0]) == len(times): # print "Datetime mismatch" # continue # downstream_reading = current_readings[sensors['downstream']['id']] timestamp = reading['timestamp'] timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M') # print timestamp # upstream_total = 0 # # for intersection_id, v in upstream_intersections.items(): # if intersection_id != downstream_reading['site_no']: # total = sum([current_readings[intersection_id]['readings'][sensor] for sensor in v['sensors']]) # if v['subtract']: # upstream_total -= total # else: # upstream_total += total # downstream_total = sum((reading['readings'][s] for s in sensors['downstream']['sensors'])) downstream_total = float(reading['downstream']) fields = { "timestamp": timestamp, 'downstream': downstream_total, # 'upstream': upstream_total } # writer.writerow(fields) result = model.run(fields) # print result anomaly_score = result.inferences["anomalyScore"] prediction = result.inferences["multiStepBestPredictions"][1] # likelihood = anomaly_likelihood_helper.anomalyProbability(downstream_total, anomaly_score, timestamp) # print "input", downstream_total, "Pred", prediction, "anomaly_score", anomaly_score output.write(timestamp, downstream_total, prediction, anomaly_score)