def main(argv): # Retreive settings from JSON settings file with open(SMART_DRIVER) as driver: jsonDataFile = json.load(driver) granularity = int(jsonDataFile['granularity']) training_window = int(jsonDataFile['windowSize']) forecasting_interval = int(jsonDataFile['forecastingInterval']) print ("\nStarting analysis on database with settings %d %d %d..." % (granularity, training_window, forecasting_interval)) granularity_in_seconds = granularity * 60 # Initialize database database = Database(DB_CONFIG) # Get the list of feature numbers id_list = getListIDs(jsonDataFile["idSelection"]) id_list = list(set(id_list)) # Remove duplicates id_list.sort() # Determine the range of times to pull data from # If the user specified a timeframe, use that if(int(jsonDataFile["specifyTime"])): start_time = dt.datetime.strptime(jsonDataFile["beginTime"], DATE_FORMAT) end_time = dt.datetime.strptime(jsonDataFile["endTime"], DATE_FORMAT) # Otherwise, find the largest timeframe for which each feature has data else: start_time, end_time = getStartEndTimes(id_list) print "Start, end: ", start_time, end_time # Get the list of column headers for the features columns = [] for id in id_list: columns.append(jsonDataFile['data'][id-1]['columnName']) columns.append(jsonDataFile['totalConsum']) #print "The following features were found:", columns # Algorithm settings algo = Algo(granularity, training_window, forecasting_interval, len(columns)-1) # Output lists y_time = ['Timestamp'] y_target = ['Target'] y_predict = ['Prediction'] anomalies = ['Anomaly'] count = 0 # EWMA additions # alpha is adjustable on a scale of (0, 1] # The smaller value of alpha, the more averaging takes place # A value of 1.0 means no averaging happens #alpha = float(raw_input('Enter Value of alpha:')) algo.setEMAParameter(alpha=1.0) #algo.setEMAParameter(alpha=0.7) #Recomended Severity Parameters from Paper #algo.setSeverityParameters(w=0.53, L=3.714) # Most sensitive #algo.setSeverityParameters(w=0.84, L=3.719) # Medium sensitive #algo.setSeverityParameters(w=1.00, L=3.719) # Least sensitive algo.setSeverityParameters(w=1, L=3.719) # Custom senstivity detected = set() ground_truth = set() #==================== ANALYSIS ====================# print "Beginning analysis..." while start_time < end_time: # FOR SMART* ONLY # Some of the data seems bad on the 31st - too many NULLS if (start_time > dt.datetime(2012, 5, 30) and start_time < dt.datetime(2012, 6, 1)): start_time = dt.datetime(2012, 6, 1) if(count % 240 == 0): print "trying time: %s " % start_time count += 1 #Execute the query: stop_time = start_time + dt.timedelta(0, granularity_in_seconds) new_data = database.get_avg_data(start_time, stop_time, columns) new_data = np.asarray([max(0, data) for data in new_data]) # remove 'nan' and negative target, prediction = algo.run(new_data) # Magic! if prediction != None: y_time.append(start_time) y_target.append(target) y_predict.append(float(prediction)) if algo.checkSeverity(target, float(prediction)): detected.add(start_time) anomalies.append(1) else: anomalies.append(0) start_time = stop_time #Increment and loop #==================== RESULTS ====================# # Save data for later graphing results = y_time, y_target, y_predict, anomalies writeResults(outfile, results) f1_scores(detected, ground_truth) print_stats(y_target[1:], y_predict[1:]) #Remove header print "Ending analysis. See %s for results." % outfile return results
def startAnalysis(self): # Use filename from attackEdit instead of inputEdit if possible if len(self.attackList) > 0: infile = str(self.attackEdit.text()) else: infile = str(self.inputEdit.text()) outfile = str(self.resultsEdit.text()) granularity = 1 trainingWin = 24 forecastingInterval = 1 print ("\nStarting analysis on %s with settings %d %d %d..." % (infile, granularity, trainingWin, forecastingInterval)) # Get list of features (first columns is time) infile = open(infile, 'rb') reader = csv.reader(infile) columns = reader.next()[1:] print "The following features were found:", columns # Algorithm settings algo = Algo(granularity, trainingWin, forecastingInterval, len(columns)-1) algo.setEMAParameter(alpha=self.emaSpin.value()) algo.setSeverityParameters(w=self.severitySpinW.value(), L=self.severitySpinL.value()) y_time = ['Timestamp'] y_target = ['Target'] y_predict = ['Prediction'] anomalies = ['Anomaly'] detected = set() ground_truth = set() first = True print "Beginning analysis..." loadingWin = LoadingWindow() self.mainWidget.setEnabled(False) count = 0 for line in reader: # Read new data from file cur_time = float(line[0]) new_data = np.asarray(line[1:], np.float) target, prediction = algo.run(new_data) # Magic! if prediction != None: y_time.append(cur_time) y_target.append(target) y_predict.append(float(prediction)) if algo.checkSeverity(target, float(prediction)): detected.add(cur_time) anomalies.append(1) else: anomalies.append(0) cur_datetime = dt.datetime.fromtimestamp(cur_time) for attack in self.attackList: if(cur_datetime >= attack.start and cur_datetime < attack.end): ground_truth.add(cur_time) break if (count % 60) == 0: #print "Trying time: ", cur_time QtGui.QApplication.processEvents() count += 1 # Close the input file and save results infile.close() writeResults(outfile, (y_time, y_target, y_predict, anomalies)) f1_scores(detected, ground_truth) print_stats(y_target[1:], y_predict[1:]) #Remove header print "Ending analysis. See %s for results." % outfile self.mainWidget.setEnabled(True) loadingWin.close()
def main(argv): # Time the analysis: tic = time.time() # Get commandline arguments try: infile = argv[1] granularity = int(argv[2]) training_window = int(argv[3]) forecasting_interval = int(argv[4]) except Exception: raise RuntimeError("usage: python " + argv[0] + " <infile>" + " <granularity>" + " <training_window>" + " <forecasting_interval>") print ("\nStarting analysis on %s with settings %d %d %d..." % (infile, granularity, training_window, forecasting_interval)) # Generate outfile name based on infile outfile = infile.rstrip('.csv') + '.results.csv' # Separate feature timestamps, feature data, and power data dataframe = pd.read_csv(infile) timestamps = dataframe.ix[:, 0].values targets = dataframe.ix[:, -1].values features_df = dataframe.ix[:, 1:-1] # Filter features with little or no variance print "\nRemoving features due to low variance:" for column in features_df.columns: values = features_df[column].values if (values.max() == values.min()): features_df = features_df.drop(column, 1) print column #dataframe = dataframe.drop('Audio Sensor', 1) #dataframe = dataframe.drop('Luminescence_11 (Lux)', 1) features_df = features_df[['Audio Sensor', 'Temperature_10 (C)']] print "\nThe following features will be used: " for column_name in features_df.columns: print column_name features = features_df.values features = scaleFeatures(features) num_features = features.shape[1] # Algorithm settings algo = Algo(granularity, training_window, forecasting_interval, num_features) # Output lists y_time = ['Timestamp'] y_target = ['Target'] y_predict = ['Prediction'] anomalies = ['Anomaly'] print "Algorithm settings:" # EWMA additions # alpha is adjustable on a scale of (0, 1] # The smaller value of alpha, the more averaging takes place # A value of 1.0 means no averaging happens #alpha = float(raw_input('Enter Value of alpha:')) algo.setEMAParameter(alpha=1.0) #algo.setEMAParameter(alpha=0.75) #algo.setEMAParameter(alpha=0.5) #algo.setEMAParameter(alpha=0.25) # Recomended Severity Parameters from Paper #algo.setSeverityParameters(w=0.53, L=3.714) # Most sensitive #algo.setSeverityParameters(w=0.84, L=3.719) # Medium sensitive #algo.setSeverityParameters(w=1.00, L=3.719) # Least sensitive algo.setSeverityParameters(w=1, L=2) # Custom senstivity # More settings algo.setMaxPrediction(500) #USED For F1 Calculation detected = set() ground_truth = set() #==================== ANALYSIS ====================# print "\nBeginning analysis..." count = 0 for row in features: # Update the user with the current timestamp cur_time = timestamps[count] if (count % 360) == 0: cur_dt = dt.datetime.fromtimestamp(cur_time) print "Trying time %s" % cur_dt.strftime(DATE_FORMAT) row = np.append(row, targets[count]) target, prediction = algo.run(row) # Magic! # If there is a prediction, check for anomalies if prediction != None: y_time.append(cur_time) y_target.append(target) y_predict.append(float(prediction)) if algo.checkSeverity(target, float(prediction)): detected.add(cur_time) anomalies.append(1) else: anomalies.append(0) # NOTE: If you plan on injecting attacks, you can add the # timestamps here to create a "ground truth" set, which can then # be used to calculate the accuracy, precision and F1 scores. #if cur_time >= 1338696000 and cur_time <= 1338699600: # ground_truth.add(cur_time) #elif cur_time >= 1338955200 and cur_time <= 1338958800: # ground_truth.add(cur_time) #elif cur_time >= 1339128000 and cur_time <= 1339131600: # ground_truth.add(cur_time) # Loop back to the beginning count += 1 #==================== RESULTS ====================# # Save data for later graphing results = y_time, y_target, y_predict, anomalies writeResults(outfile, results) #f1_scores(detected, ground_truth) #Uncomment to show F1 scores print_stats(y_target[1:], y_predict[1:]) #Remove header print "Run time: ", time.time() - tic print "Ending analysis. See %s for results." % outfile return results