#!/usr/bin/python3 import DataTypes import DataLoader loader = DataLoader.DataLoader("../../dataset/") data = DataTypes.Data() # loader.load_all(data) # load all data, use default log ratio loader.load_subset(data) # load subset of data, use default size and log ratio
def load_data(): retval = DataTypes.Data() loader = DataLoader.DataLoader("./dataset/") #loader.load_subset(retval, 1000) loader.load_all(retval) return retval
def main(argv): #load configuration parameters = load_configuration() #load parameters #dataset path_to_dataset = parameters['path_to_dataset'] load_size = parameters['load_size'] #SAX alphabet_size = parameters['alphabet_size'] paa_size = parameters['paa_size'] window_size = parameters['window_size'] step = parameters['step'] substring_size = parameters['substring_size'] #smoothing threshold_freq = parameters['threshold_freq'] #projections prj_size = parameters['prj_size'] prj_iterations = parameters['prj_iterations'] anomaly_threshold = parameters['anomaly_threshold'] #loading data loader = DataLoader.DataLoader(path_to_dataset) data = DataTypes.Data() #loader.load_all(data,200) loader.load_subset(data, load_size, 100) #period from which extract anomalies begin_date = datetime.datetime.fromtimestamp(data.index_to_time[0]) end_date = datetime.datetime.fromtimestamp(data.index_to_time[load_size - 1]) if parameters['power_type'] == -1: tank = parameters['tank'] sensor_type = parameters['sensor_type'] #print(data.measures[0]) print("Loading of %i tank %i data from %s to %s " % (sensor_type, tank, begin_date, end_date)) s_values = [ data.measures[i][0][tank][sensor_type] for i in range(0, len(data.measures)) ] else: power_type = parameters['power_type'] print("Loading measures of power %i from %s to %s " % (power_type, begin_date, end_date)) s_values = [ data.measures[i][1][power_type] for i in range(0, len(data.measures)) ] len_serie = len(s_values) hash_table_substrings = {} #getting first n alphabet letters alphabet = get_alphabet_letters(alphabet_size) #creating hash table indexed by all of substrings of length k hash_table_substrings = get_hash_table(alphabet, prj_size) #list containg score for each window anomalies_score = [] for index in range(0, len_serie, step): begin = index end = begin + window_size if end < len_serie: window_values = s_values[begin:end] window_znorm = znorm(s_values) window_paa = paa(window_znorm, paa_size) window_string = ts_to_string(window_paa, cuts_for_asize(alphabet_size)) #each character of the string corresponds to k values of the series k = window_size // paa_size #get smoothed string window_smoothed = smoothing(window_string, threshold_freq) #fill hash table by applying random projection hash_table_substrings = put_in_bucket(hash_table_substrings, window_smoothed, begin, prj_iterations, prj_size, substring_size, k) total = 0 for key, values in hash_table_substrings.items(): total = total + len(values) buckets_with_anomalies, bucket_freq = analyzed_bucket( hash_table_substrings, total, anomaly_threshold) #number of bucket with anomalies n_buckets_anomalies = len(buckets_with_anomalies.keys()) #getting score for current window avg_window_score = getting_score(hash_table_substrings, buckets_with_anomalies, n_buckets_anomalies) anomalies_score.append(avg_window_score) #reset table hash_table_substrings = get_hash_table(alphabet, prj_size) else: break print(anomalies_score)