def main(argv): # Time the analysis: tic = time.time() # Get commandline arguments try: infile = argv[1] granularity = int(argv[2]) training_window = int(argv[3]) forecasting_interval = int(argv[4]) except Exception: raise RuntimeError("usage: python " + argv[0] + " <infile>" + " <granularity>" + " <training_window>" + " <forecasting_interval>") print ("\nStarting analysis on %s with settings %d %d %d..." % (infile, granularity, training_window, forecasting_interval)) # Generate outfile name based on infile outfile = infile.rstrip('.csv') + '.results.csv' # Separate feature timestamps, feature data, and power data dataframe = pd.read_csv(infile) timestamps = dataframe.ix[:, 0].values targets = dataframe.ix[:, -1].values features_df = dataframe.ix[:, 1:-1] # Filter features with little or no variance print "\nRemoving features due to low variance:" for column in features_df.columns: values = features_df[column].values if (values.max() == values.min()): features_df = features_df.drop(column, 1) print column #dataframe = dataframe.drop('Audio Sensor', 1) #dataframe = dataframe.drop('Luminescence_11 (Lux)', 1) features_df = features_df[['Audio Sensor', 'Temperature_10 (C)']] print "\nThe following features will be used: " for column_name in features_df.columns: print column_name features = features_df.values features = scaleFeatures(features) num_features = features.shape[1] # Algorithm settings algo = Algo(granularity, training_window, forecasting_interval, num_features) # Output lists y_time = ['Timestamp'] y_target = ['Target'] y_predict = ['Prediction'] anomalies = ['Anomaly'] print "Algorithm settings:" # EWMA additions # alpha is adjustable on a scale of (0, 1] # The smaller value of alpha, the more averaging takes place # A value of 1.0 means no averaging happens #alpha = float(raw_input('Enter Value of alpha:')) algo.setEMAParameter(alpha=1.0) #algo.setEMAParameter(alpha=0.75) #algo.setEMAParameter(alpha=0.5) #algo.setEMAParameter(alpha=0.25) # Recomended Severity Parameters from Paper #algo.setSeverityParameters(w=0.53, L=3.714) # Most sensitive #algo.setSeverityParameters(w=0.84, L=3.719) # Medium sensitive #algo.setSeverityParameters(w=1.00, L=3.719) # Least sensitive algo.setSeverityParameters(w=1, L=2) # Custom senstivity # More settings algo.setMaxPrediction(500) #USED For F1 Calculation detected = set() ground_truth = set() #==================== ANALYSIS ====================# print "\nBeginning analysis..." count = 0 for row in features: # Update the user with the current timestamp cur_time = timestamps[count] if (count % 360) == 0: cur_dt = dt.datetime.fromtimestamp(cur_time) print "Trying time %s" % cur_dt.strftime(DATE_FORMAT) row = np.append(row, targets[count]) target, prediction = algo.run(row) # Magic! # If there is a prediction, check for anomalies if prediction != None: y_time.append(cur_time) y_target.append(target) y_predict.append(float(prediction)) if algo.checkSeverity(target, float(prediction)): detected.add(cur_time) anomalies.append(1) else: anomalies.append(0) # NOTE: If you plan on injecting attacks, you can add the # timestamps here to create a "ground truth" set, which can then # be used to calculate the accuracy, precision and F1 scores. #if cur_time >= 1338696000 and cur_time <= 1338699600: # ground_truth.add(cur_time) #elif cur_time >= 1338955200 and cur_time <= 1338958800: # ground_truth.add(cur_time) #elif cur_time >= 1339128000 and cur_time <= 1339131600: # ground_truth.add(cur_time) # Loop back to the beginning count += 1 #==================== RESULTS ====================# # Save data for later graphing results = y_time, y_target, y_predict, anomalies writeResults(outfile, results) #f1_scores(detected, ground_truth) #Uncomment to show F1 scores print_stats(y_target[1:], y_predict[1:]) #Remove header print "Run time: ", time.time() - tic print "Ending analysis. See %s for results." % outfile return results