コード例 #1
0
#!/usr/bin/python3

import DataTypes
import DataLoader

loader = DataLoader.DataLoader("../../dataset/")
data = DataTypes.Data()
# loader.load_all(data) # load all data, use default log ratio
loader.load_subset(data)  # load subset of data, use default size and log ratio
コード例 #2
0
def load_data():
    retval = DataTypes.Data()
    loader = DataLoader.DataLoader("./dataset/")
    #loader.load_subset(retval, 1000)
    loader.load_all(retval)
    return retval
コード例 #3
0
def main(argv):

    #load configuration
    parameters = load_configuration()

    #load parameters

    #dataset
    path_to_dataset = parameters['path_to_dataset']
    load_size = parameters['load_size']

    #SAX
    alphabet_size = parameters['alphabet_size']
    paa_size = parameters['paa_size']
    window_size = parameters['window_size']
    step = parameters['step']
    substring_size = parameters['substring_size']

    #smoothing
    threshold_freq = parameters['threshold_freq']

    #projections
    prj_size = parameters['prj_size']
    prj_iterations = parameters['prj_iterations']
    anomaly_threshold = parameters['anomaly_threshold']

    #loading data
    loader = DataLoader.DataLoader(path_to_dataset)
    data = DataTypes.Data()

    #loader.load_all(data,200)
    loader.load_subset(data, load_size, 100)

    #period from which extract anomalies
    begin_date = datetime.datetime.fromtimestamp(data.index_to_time[0])
    end_date = datetime.datetime.fromtimestamp(data.index_to_time[load_size -
                                                                  1])

    if parameters['power_type'] == -1:
        tank = parameters['tank']
        sensor_type = parameters['sensor_type']
        #print(data.measures[0])
        print("Loading of %i tank %i  data from %s to %s " %
              (sensor_type, tank, begin_date, end_date))
        s_values = [
            data.measures[i][0][tank][sensor_type]
            for i in range(0, len(data.measures))
        ]
    else:
        power_type = parameters['power_type']
        print("Loading measures of power %i from %s to %s " %
              (power_type, begin_date, end_date))
        s_values = [
            data.measures[i][1][power_type]
            for i in range(0, len(data.measures))
        ]

    len_serie = len(s_values)
    hash_table_substrings = {}

    #getting first n alphabet letters
    alphabet = get_alphabet_letters(alphabet_size)
    #creating hash table indexed by all of substrings of length k
    hash_table_substrings = get_hash_table(alphabet, prj_size)

    #list containg score for each window
    anomalies_score = []

    for index in range(0, len_serie, step):
        begin = index
        end = begin + window_size

        if end < len_serie:
            window_values = s_values[begin:end]
            window_znorm = znorm(s_values)
            window_paa = paa(window_znorm, paa_size)
            window_string = ts_to_string(window_paa,
                                         cuts_for_asize(alphabet_size))

            #each character of the string corresponds to k values of the series
            k = window_size // paa_size

            #get smoothed string
            window_smoothed = smoothing(window_string, threshold_freq)

            #fill hash table by applying random projection
            hash_table_substrings = put_in_bucket(hash_table_substrings,
                                                  window_smoothed, begin,
                                                  prj_iterations, prj_size,
                                                  substring_size, k)

            total = 0
            for key, values in hash_table_substrings.items():
                total = total + len(values)

            buckets_with_anomalies, bucket_freq = analyzed_bucket(
                hash_table_substrings, total, anomaly_threshold)
            #number of bucket with anomalies
            n_buckets_anomalies = len(buckets_with_anomalies.keys())

            #getting score for current window
            avg_window_score = getting_score(hash_table_substrings,
                                             buckets_with_anomalies,
                                             n_buckets_anomalies)
            anomalies_score.append(avg_window_score)

            #reset table
            hash_table_substrings = get_hash_table(alphabet, prj_size)

        else:
            break

    print(anomalies_score)