def startAnalysis(self): # Use filename from attackEdit instead of inputEdit if possible if len(self.attackList) > 0: infile = str(self.attackEdit.text()) else: infile = str(self.inputEdit.text()) outfile = str(self.resultsEdit.text()) granularity = 1 trainingWin = 24 forecastingInterval = 1 print ("\nStarting analysis on %s with settings %d %d %d..." % (infile, granularity, trainingWin, forecastingInterval)) # Get list of features (first columns is time) infile = open(infile, 'rb') reader = csv.reader(infile) columns = reader.next()[1:] print "The following features were found:", columns # Algorithm settings algo = Algo(granularity, trainingWin, forecastingInterval, len(columns)-1) algo.setEMAParameter(alpha=self.emaSpin.value()) algo.setSeverityParameters(w=self.severitySpinW.value(), L=self.severitySpinL.value()) y_time = ['Timestamp'] y_target = ['Target'] y_predict = ['Prediction'] anomalies = ['Anomaly'] detected = set() ground_truth = set() first = True print "Beginning analysis..." loadingWin = LoadingWindow() self.mainWidget.setEnabled(False) count = 0 for line in reader: # Read new data from file cur_time = float(line[0]) new_data = np.asarray(line[1:], np.float) target, prediction = algo.run(new_data) # Magic! if prediction != None: y_time.append(cur_time) y_target.append(target) y_predict.append(float(prediction)) if algo.checkSeverity(target, float(prediction)): detected.add(cur_time) anomalies.append(1) else: anomalies.append(0) cur_datetime = dt.datetime.fromtimestamp(cur_time) for attack in self.attackList: if(cur_datetime >= attack.start and cur_datetime < attack.end): ground_truth.add(cur_time) break if (count % 60) == 0: #print "Trying time: ", cur_time QtGui.QApplication.processEvents() count += 1 # Close the input file and save results infile.close() writeResults(outfile, (y_time, y_target, y_predict, anomalies)) f1_scores(detected, ground_truth) print_stats(y_target[1:], y_predict[1:]) #Remove header print "Ending analysis. See %s for results." % outfile self.mainWidget.setEnabled(True) loadingWin.close()
def main(argv): # Retreive settings from JSON settings file with open(SMART_DRIVER) as driver: jsonDataFile = json.load(driver) granularity = int(jsonDataFile['granularity']) training_window = int(jsonDataFile['windowSize']) forecasting_interval = int(jsonDataFile['forecastingInterval']) print ("\nStarting analysis on database with settings %d %d %d..." % (granularity, training_window, forecasting_interval)) granularity_in_seconds = granularity * 60 # Initialize database database = Database(DB_CONFIG) # Get the list of feature numbers id_list = getListIDs(jsonDataFile["idSelection"]) id_list = list(set(id_list)) # Remove duplicates id_list.sort() # Determine the range of times to pull data from # If the user specified a timeframe, use that if(int(jsonDataFile["specifyTime"])): start_time = dt.datetime.strptime(jsonDataFile["beginTime"], DATE_FORMAT) end_time = dt.datetime.strptime(jsonDataFile["endTime"], DATE_FORMAT) # Otherwise, find the largest timeframe for which each feature has data else: start_time, end_time = getStartEndTimes(id_list) print "Start, end: ", start_time, end_time # Get the list of column headers for the features columns = [] for id in id_list: columns.append(jsonDataFile['data'][id-1]['columnName']) columns.append(jsonDataFile['totalConsum']) #print "The following features were found:", columns # Algorithm settings algo = Algo(granularity, training_window, forecasting_interval, len(columns)-1) # Output lists y_time = ['Timestamp'] y_target = ['Target'] y_predict = ['Prediction'] anomalies = ['Anomaly'] count = 0 # EWMA additions # alpha is adjustable on a scale of (0, 1] # The smaller value of alpha, the more averaging takes place # A value of 1.0 means no averaging happens #alpha = float(raw_input('Enter Value of alpha:')) algo.setEMAParameter(alpha=1.0) #algo.setEMAParameter(alpha=0.7) #Recomended Severity Parameters from Paper #algo.setSeverityParameters(w=0.53, L=3.714) # Most sensitive #algo.setSeverityParameters(w=0.84, L=3.719) # Medium sensitive #algo.setSeverityParameters(w=1.00, L=3.719) # Least sensitive algo.setSeverityParameters(w=1, L=3.719) # Custom senstivity detected = set() ground_truth = set() #==================== ANALYSIS ====================# print "Beginning analysis..." while start_time < end_time: # FOR SMART* ONLY # Some of the data seems bad on the 31st - too many NULLS if (start_time > dt.datetime(2012, 5, 30) and start_time < dt.datetime(2012, 6, 1)): start_time = dt.datetime(2012, 6, 1) if(count % 240 == 0): print "trying time: %s " % start_time count += 1 #Execute the query: stop_time = start_time + dt.timedelta(0, granularity_in_seconds) new_data = database.get_avg_data(start_time, stop_time, columns) new_data = np.asarray([max(0, data) for data in new_data]) # remove 'nan' and negative target, prediction = algo.run(new_data) # Magic! if prediction != None: y_time.append(start_time) y_target.append(target) y_predict.append(float(prediction)) if algo.checkSeverity(target, float(prediction)): detected.add(start_time) anomalies.append(1) else: anomalies.append(0) start_time = stop_time #Increment and loop #==================== RESULTS ====================# # Save data for later graphing results = y_time, y_target, y_predict, anomalies writeResults(outfile, results) f1_scores(detected, ground_truth) print_stats(y_target[1:], y_predict[1:]) #Remove header print "Ending analysis. See %s for results." % outfile return results