コード例 #1
0
ファイル: algoCSV.py プロジェクト: Merit-Research/BLR-Tools
def main(argv):

    # Time the analysis:
    tic = time.time()

    # Get commandline arguments
    try:
        infile = argv[1]
        granularity = int(argv[2])
        training_window = int(argv[3])
        forecasting_interval = int(argv[4])
    except Exception:
        raise RuntimeError("usage: python "
                           + argv[0]
                           + " <infile>"
                           + " <granularity>"
                           + " <training_window>"
                           + " <forecasting_interval>")

    print ("\nStarting analysis on %s with settings %d %d %d..." 
           % (infile, granularity, training_window, forecasting_interval))
           
    # Generate outfile name based on infile
    outfile = infile.rstrip('.csv') + '.results.csv'

    # Separate feature timestamps, feature data, and power data
    dataframe = pd.read_csv(infile)
    timestamps = dataframe.ix[:, 0].values
    targets = dataframe.ix[:, -1].values
    
    features_df = dataframe.ix[:, 1:-1]

    # Filter features with little or no variance
    print "\nRemoving features due to low variance:"
    for column in features_df.columns:
        values = features_df[column].values
        if (values.max() == values.min()):
            features_df = features_df.drop(column, 1)
            print column

    #dataframe = dataframe.drop('Audio Sensor', 1)
    #dataframe = dataframe.drop('Luminescence_11 (Lux)', 1)
    
    features_df = features_df[['Audio Sensor', 
                               'Temperature_10 (C)']]

    print "\nThe following features will be used: "
    for column_name in features_df.columns:
        print column_name

    features = features_df.values
    features = scaleFeatures(features)
    num_features = features.shape[1]
    
    # Algorithm settings
    algo = Algo(granularity, training_window, forecasting_interval, num_features)
    
    # Output lists
    y_time = ['Timestamp']
    y_target = ['Target']
    y_predict = ['Prediction']
    anomalies = ['Anomaly']
    
    print "Algorithm settings:"
    
    # EWMA additions
    # alpha is adjustable on a scale of (0, 1]
    # The smaller value of alpha, the more averaging takes place
    # A value of 1.0 means no averaging happens
    #alpha = float(raw_input('Enter Value of alpha:'))
    algo.setEMAParameter(alpha=1.0)
    #algo.setEMAParameter(alpha=0.75)
    #algo.setEMAParameter(alpha=0.5)
    #algo.setEMAParameter(alpha=0.25)
    
    # Recomended Severity Parameters from Paper
    #algo.setSeverityParameters(w=0.53, L=3.714) # Most sensitive
    #algo.setSeverityParameters(w=0.84, L=3.719) # Medium sensitive
    #algo.setSeverityParameters(w=1.00, L=3.719) # Least sensitive 
    algo.setSeverityParameters(w=1, L=2) # Custom senstivity 
    
    # More settings
    algo.setMaxPrediction(500)
    
    #USED For F1 Calculation
    detected = set()
    ground_truth = set()
    
    #==================== ANALYSIS ====================#
    print "\nBeginning analysis..."
    count = 0
    for row in features:

        # Update the user with the current timestamp
        cur_time = timestamps[count]
        if (count % 360) == 0:
            cur_dt = dt.datetime.fromtimestamp(cur_time)
            print "Trying time %s" % cur_dt.strftime(DATE_FORMAT)
        
        row = np.append(row, targets[count])
        target, prediction = algo.run(row) # Magic!
        
        # If there is a prediction, check for anomalies
        if prediction != None:
            y_time.append(cur_time)
            y_target.append(target)
            y_predict.append(float(prediction))
            
            if algo.checkSeverity(target, float(prediction)):
                detected.add(cur_time)
                anomalies.append(1)
            else:
                anomalies.append(0)

            # NOTE: If you plan on injecting attacks, you can add the
            # timestamps here to create a "ground truth" set, which can then
            # be used to calculate the accuracy, precision and F1 scores.
            #if cur_time >= 1338696000 and cur_time <= 1338699600:
            #    ground_truth.add(cur_time)
            #elif cur_time >= 1338955200 and cur_time <= 1338958800:
            #    ground_truth.add(cur_time)
            #elif cur_time >= 1339128000 and cur_time <= 1339131600:
            #    ground_truth.add(cur_time)

        # Loop back to the beginning
        count += 1

            
    #==================== RESULTS ====================#

    # Save data for later graphing
    results = y_time, y_target, y_predict, anomalies
    writeResults(outfile, results)
    #f1_scores(detected, ground_truth) #Uncomment to show F1 scores 
    print_stats(y_target[1:], y_predict[1:]) #Remove header
    print "Run time: ", time.time() - tic
    print "Ending analysis. See %s for results." % outfile
    
    return results