def main(args): if "-h" in args or len(args) < 2: print 'Usage: python %s {compression factor} {input file path}' % (args[0]) exit() compression_factor = int(args[1]) input_file_path = args[2] motes = {} # holds last reading for each mote tr = TraceReader(input_file_path) tw = TraceWriter("%s_compressed.%s" % (os.path.basename(input_file_path).split(".")[0], tr.file_type), tr.arff_attributes) try: for timestamp, mote_id, counter, temperature in tr.read(): mote = motes.get(mote_id, None) if mote is not None: if len(mote['temp_buffer']) >= compression_factor: avg_temp = sum(mote['temp_buffer']) / len(mote['temp_buffer']) initial_timestamp = mote['timestamp_buffer'][0] tw.write((initial_timestamp, mote_id, avg_temp)) del mote['temp_buffer'][:] del mote['timestamp_buffer'][:] else: motes[mote_id] = {'temp_buffer': [], 'timestamp_buffer': []} motes[mote_id]['temp_buffer'].append(temperature) motes[mote_id]['timestamp_buffer'].append(timestamp) finally: tw.close()
def setup(self): ''' Find emergencies and do some preprocessing. ''' trace_reader = TraceReader(self.trace_path) trace_gen = trace_reader.parse() # Fills the observation list and finds the trace_start and trace_end data = trace_gen.next() self.trace_start = data[ 'timestamp'] + self.trace_start_offset # actual trace start self.observations.append(self.data_facade(data, self.mote_id)) for data in trace_gen: self.observations.append(self.data_facade(data, self.mote_id)) self.trace_end = data['timestamp'] # Fills the hidden_vars list if self.spirit_mode: with open(self.hidden_vars_path) as file_obj: self.mote_ids = file_obj.readline()[1:].split() self.hidden_vars = [data[1] for data in file_iter(file_obj)] with open(self.weights_path) as file_obj: self.weights = [data[1:] for data in file_iter(file_obj)] # Finds all emergencies self.emergencies = [e for e in self.confirmation_trigger(iter(self.observations), \ offset=self.trace_start - self.trace_start_offset)] self.emergencies = trim_emergencies(self.trace_start, self.emergencies, self.trace_end) self.emergencies = join_emergencies(self.emergencies, self.delta) assert len(self.emergencies) > 0
def __init__(self, trace_path, lag): oracle_trace = TraceReader(trace_path) self.trace_gen = oracle_trace.read() for _i in xrange(lag - 1): self.trace_gen.next() self.training_delay = 0 self.last_value = 0
def main(args): if len(args) < 2: print "Usage: python %s {input_filename} [-compress {factor}] [-i motes_to_ignore]" % args[ 0] exit() filename = args[1] motes_to_ignore = [int(m) for m in args[args.index('-i') + 1:]] if '-i' in args else [] compression_factor = int(args[args.index('-compress') + 1]) if '-compress' in args else 0 tracereader = TraceReader(filename, supress_repetitions=False, auto_timestamps=False, \ auto_interpolation=False, motes_to_ignore=motes_to_ignore) print "Pre-processing..." if filename.endswith("agg"): t_adapter = TraceAdapter(tracereader, RAW_FILE_FORMAT, TEMP) outname = get_output_filename(filename, ".txt") else: t_adapter = TraceAdapter(tracereader, UNIFORM_FILE_FORMAT) outname = get_output_filename(filename) tracewriter = TraceWriter(outname, arff_attributes=t_adapter.arff_attributes) print "Processing..." try: if compression_factor == 0: for data in t_adapter.parse(): tracewriter.write(data) else: #acc = dict.fromkeys(tracereader.arff_attributes, 0.0) acc = None count = 0 for data in t_adapter.parse(): if count % compression_factor == 0: if acc: # Calculate mean for k, v in acc.iteritems(): acc[k] /= compression_factor tracewriter.write(acc) acc = data.copy() else: for k, v in data.iteritems(): acc[k] += v count += 1 finally: tracereader.close() tracewriter.close() print "Done."
def main(): filename = r"D:\Giulio\My Dropbox\Projeto Sensores\experiments\temperatura\sala_servidores\samples_04_10_12_12h28m36s.arff" #filename = r"D:\Giulio\workspace2\SensorMonitor\output\arff\temps_25_05_12_15h09m48s.arff" splits = 1 #detectors = [CUSUMDetector(anomaly_threshold=0.01, L=0.0, alpha=0.6) for _ in xrange(splits)] detectors = [TSBitmaps(lag_window=8, lead_window=8, anomaly_threshold=0.355, N=400, n=100, alphabet="abcd") for _ in xrange(splits)] splitter = SPIRITSplitter(detectors) tr = TraceReader(filename, supress_repetitions=False, auto_timestamps=False, suppress_rapid_changes=False) for data in tr.read(): anomalies = splitter.update(data) if anomalies: print data['timestamp'] - 600, data['timestamp'] + 600#, anomalies
def main(args): if len(args) < 2: print "Usage: python %s {input_filename} [-compress {factor}] [-i motes_to_ignore]" % args[0] exit() filename = args[1] motes_to_ignore = [int(m) for m in args[args.index('-i') + 1:]] if '-i' in args else [] compression_factor = int(args[args.index('-compress') + 1]) if '-compress' in args else 0 tracereader = TraceReader(filename, supress_repetitions=False, auto_timestamps=False, \ auto_interpolation=False, motes_to_ignore=motes_to_ignore) print "Pre-processing..." if filename.endswith("agg"): t_adapter = TraceAdapter(tracereader, RAW_FILE_FORMAT, TEMP) outname = get_output_filename(filename, ".txt") else: t_adapter = TraceAdapter(tracereader, UNIFORM_FILE_FORMAT) outname = get_output_filename(filename) tracewriter = TraceWriter(outname, arff_attributes=t_adapter.arff_attributes) print "Processing..." try: if compression_factor == 0: for data in t_adapter.parse(): tracewriter.write(data) else: #acc = dict.fromkeys(tracereader.arff_attributes, 0.0) acc = None count = 0 for data in t_adapter.parse(): if count % compression_factor == 0: if acc: # Calculate mean for k, v in acc.iteritems(): acc[k] /= compression_factor tracewriter.write(acc) acc = data.copy() else: for k, v in data.iteritems(): acc[k] += v count += 1 finally: tracereader.close() tracewriter.close() print "Done."
def calc_spirit_errors(timeseries_length, errors_to_calculate, lag, results, \ weights_filename, trace_filename): ''' @todo: make sure that the weight vector matches the expected order ''' # calculate errors cutting out the training data errors = [list() for _i in xrange(len(errors_to_calculate))] weights_file = open(weights_filename) # spirit weights trace_file = open(trace_filename) # real trace with multiple series predicted_data = [] observed_data = [] try: weights_file_iter = file_iter(weights_file) trace_file_iter = TraceReader(trace_filename).parse() # advance lag for _i in xrange(lag): trace_file_iter.next() for k in xrange(len(errors_to_calculate)): errors[k].append(0) for i in xrange(timeseries_length - lag): w = weights_file_iter.next() _y, predicted_y = results[i] reconstructed_x = [predicted_y * w[j] for j in xrange(1, len(w))] # skip timestamp x = [v for k, v in trace_file_iter.next().iteritems() if k != "timestamp"] predicted_data.append(reconstructed_x[:1]) observed_data.append(x[:1]) for k in xrange(len(errors_to_calculate)): error_method = errors_to_calculate[k] error = _calc_error(x, reconstructed_x, error_method, True) errors[k].append(error) finally: weights_file.close() trace_file.close() if PLOT: pyplot.plot(observed_data, '-') pyplot.plot(predicted_data, '-') pyplot.show() assert len(errors[0]) == timeseries_length, "%d != %d" % (len(errors[0]), timeseries_length) return errors
def main(args): if len(args) < 2: print "Usage: python %s {filename} [-compress {factor}]" exit() filename = args[1] compression_factor = int(args[3]) if len(args) > 2 else 1 tr = TraceReader(filename) tw = TraceWriter('output.arff', tr.arff_attributes) sensors = None data_keys = None data_keys_len = 0 out_data = OrderedDict() count = 1 for data in tr.parse(): if not sensors: sensors = [Sensor() for _i in xrange(len(data) - 1)] data_keys = data.keys() data_keys_len = len(data_keys) for key in data_keys: out_data[key] = 0.0 for i in xrange(1, data_keys_len): key = data_keys[i] out_data[key] += sensors[i - 1].emulate(data[key]) if count % compression_factor == 0: out_data['timestamp'] = data['timestamp'] - compression_factor / 2. for i in xrange(1, data_keys_len): out_data[data_keys[i]] /= compression_factor tw.write(out_data) for key in data_keys: out_data[key] = 0.0 count += 1 tw.close()
def main(args): filename = '/home/giulio/Dropbox/Projeto Sensores/experiments/temperatura/sala_servidores/samples_20_02_13_15h05m47s.agg' motes_to_ignore = [] tr = TraceReader(filename, motes_to_ignore) mote_first_data = {} mote_last_data = {} line_count = 0 for data in tr.read(): mote_id = data[1] if mote_id not in mote_first_data.iterkeys(): mote_first_data[mote_id] = data mote_last_data[mote_id] = data line_count += 1 tr.reset() # replicate all the first temps to the smallest timestamp min_timestamp = min(data[0] for data in mote_first_data.itervalues()) for data in mote_first_data.itervalues(): if data[0] != min_timestamp: data = list(data) data[0] = min_timestamp print " ".join(str(d) for d in data) # print all data except last line for data in tr.read(): print " ".join(str(d) for d in data) line_count -= 1 if line_count == 1: break max_timestamp = max(data[0] for data in mote_last_data.itervalues()) for data in mote_last_data.itervalues(): if data[0] != max_timestamp: data = list(data) data[0] = max_timestamp print " ".join(str(d) for d in data)
def main(args): if "-h" in args or len(args) < 2: print 'Usage: python %s {compression factor} {input file path}' % ( args[0]) exit() compression_factor = int(args[1]) input_file_path = args[2] motes = {} # holds last reading for each mote tr = TraceReader(input_file_path) tw = TraceWriter( "%s_compressed.%s" % (os.path.basename(input_file_path).split(".")[0], tr.file_type), tr.arff_attributes) try: for timestamp, mote_id, counter, temperature in tr.read(): mote = motes.get(mote_id, None) if mote is not None: if len(mote['temp_buffer']) >= compression_factor: avg_temp = sum(mote['temp_buffer']) / len( mote['temp_buffer']) initial_timestamp = mote['timestamp_buffer'][0] tw.write((initial_timestamp, mote_id, avg_temp)) del mote['temp_buffer'][:] del mote['timestamp_buffer'][:] else: motes[mote_id] = {'temp_buffer': [], 'timestamp_buffer': []} motes[mote_id]['temp_buffer'].append(temperature) motes[mote_id]['timestamp_buffer'].append(timestamp) finally: tw.close()
def read_data(f_reading): '''This is especific for our problem, n_motes is the number of motes in file, rating is the passed time that I can see. Returns a dictionary of dictionaries, the former is a collection of readings divided by mote, the second is the same but convertered for learning algorithm, e.g: 'data': {'255':[(0, 0),(1, 1.75),(2, 3)], '254': [(3, 2.55),(4, 6),(5, 7)]}''' ''' data = {} for i in range(5): data[str(i)] = [ (i, uniform(10, 15)*i/100) for i in range(100, int(uniform(200,300)))] return data ''' try: print 'Making data' elapsed = os.times()[-1] from SharedLibs.tracetools import TraceReader, TraceAdapter, UNIFORM_FILE_FORMAT data = {} tR = TraceReader(f_reading, motes_to_ignore=[236, 246]) tA = TraceAdapter(tR, UNIFORM_FILE_FORMAT) for momment in tA.parse(): timestamp = momment['timestamp'] for mote in momment.keys()[1:]: if mote in data.keys(): data[mote].append((timestamp, momment[mote])) else: data[mote] = [(timestamp, momment[mote])] for mote in data: i, j = 0, 0 while j < len(data[mote]): while j < len( data[mote]) and data[mote][i][0] == data[mote][j][0]: j += 1 if i != j: data[mote][i:j] = [ (data[mote][i][0], sum(map(lambda x: x[1], data[mote][i:j])) / (j - i)) ] i += 1 j = i elapsed = os.times()[-1] - elapsed print '\tData done in %d seconds.\n\tMotes %s' % ( elapsed, repr(data.keys())[1:-1]) #save = open(f_reading+'data', 'w') #pickle.dump(data, save) #save.close() return data except IOError: print 'Invalid File' return {}
def main(args): if len(args) < 2: print "Usage: python %s {trace path} [-s (spirit mode) {weights path} {original trace path}]" % (args[0], ) exit() input_path = args[1] tr = TraceReader(input_path, supress_repetitions=False, auto_interpolation=False, suppress_rapid_changes=False, \ motes_to_ignore=[]) emergency_confirmations = 1 predictor_confirmations = 1 lower_temp_threshold = 30.0 higher_temp_threshold = 30.0 series_id = "mote_244" compression_factor = 1 lag = 10 * 60 cmp_lag = lag / compression_factor delta = 60 predictor_delta = 0.0 error_offset = 12 * 60 # Filter config #filter_args = {"alpha": 9 * 1e-1, "beta": 8 * 1e-3, "k": cmp_lag} #filter_args = {"lag": cmp_lag, "window_size": 2 * 60} filter_args = {"lag": cmp_lag, "dataset_size": 2 * 60, "order": 2} #filter_cls = filters.HoltFilter #filter_args = {"alpha": 1 * 1e-3} #filter_cls = filters.ExpFilter #filter_args = {} #filter_cls = filters.DummyFilter #filter_args = {"learning_rate" : 1 * 1e-3, "lag": cmp_lag, "dataset_size": 1, "num_last_measures": 1} #filter_args = {"learning_rate" : 6 * 1e-7, "lag": cmp_lag, "dataset_size": 1, "num_last_measures": 10} #filter_cls = filters.LinearPerceptron #filter_cls = filters.RollingPerceptron #filter_cls = filters.SigmoidPerceptron #filter_cls = filters.PerceptronFilterBreno #filter_cls = filters.PerceptronFilterPyBrain #filter_cls = filters.AdaptableHoltFilter filter_cls = filters.AdaptableARFilter forecaster = filter_cls(**filter_args) print "# INFO" print "# Emergency confirmations: %d" % emergency_confirmations print "# Predictor confirmations: %d" % predictor_confirmations print "# Temperature threshold: %s degrees" % ((lower_temp_threshold, higher_temp_threshold), ) print "# Series ID: %s" % series_id print "# Lag: %d seconds" % lag print "# Forecaster: %s" % repr(forecaster) print "# Predictor delta: %f" % predictor_delta print "# Compression factor: %d" % compression_factor if '-s' in args: s_index = args.index('-s') weights_path = args[s_index + 1] hidden_vars_path = input_path spirit_preproc, _predictions = spirit_closure(weights_path, hidden_vars_path, None) else: spirit_preproc = None print "Compressing trace..." compressed_trace = compress_trace(tr, compression_factor, series_id) print "Looking for emergencies..." emergencies = [e for e in confirmation_trigger(compressed_trace, lower_temp_threshold, higher_temp_threshold, \ series_id, emergency_confirmations, preproc_function=spirit_preproc)] train_delay = max(forecaster.training_delay, 1) trace_offset = cmp_lag + train_delay print "Trace offset:", trace_offset print "Making predictions..." data_gen = iter(compressed_trace) # Train trace_start = -1 for _ in xrange(train_delay): data = data_gen.next() if trace_start < 0: trace_start = data['timestamp'] + trace_offset forecaster.apply(data[series_id]) assert trace_start >= 0 # Advance notice observations = [] predictions = [] triggered_emergencies = [] def predictor_preproc_function(data, mote_id): temp = data[mote_id] observations.append(temp) prediction = forecaster.apply(temp) predictions.append(prediction) return prediction if '-s' in args: predictor_preproc_function, predictions = spirit_closure(weights_path, hidden_vars_path, forecaster.apply) for emergency_start, emergency_temp, emergency_end in \ confirmation_trigger(compressed_trace, lower_temp_threshold + predictor_delta, \ higher_temp_threshold - predictor_delta, series_id, predictor_confirmations, \ preproc_function=predictor_preproc_function): triggered_emergencies.append((emergency_start + lag, emergency_temp, emergency_end + lag)) if '-s' in args: original_trace_path = args[s_index + 2] observations = map(lambda d: d[series_id], compress_trace(TraceReader(original_trace_path), \ compression_factor, series_id)) trace_end = trace_start + (len(observations) - cmp_lag) emergencies = trim_emergencies(trace_start, emergencies, trace_end) triggered_emergencies = trim_emergencies(trace_start, triggered_emergencies, trace_end) emergencies = join_emergencies(emergencies, delta) triggered_emergencies = join_emergencies(triggered_emergencies, delta) print "Found %d emergencies." % len(emergencies) assert len(emergencies) > 0 mat = thermocast_analyze_predictions(triggered_emergencies, emergencies, lag) underdetected_time, overdetected_time, detected_time = xor_analyze_predictions(triggered_emergencies, emergencies) recall = detected_time / (detected_time + underdetected_time + 1e-100) FAR = overdetected_time / (detected_time + overdetected_time + 1e-100) del observations[:cmp_lag] # removes data that is not subject to prediction del predictions[-cmp_lag:] # removes predictions that cannot be followed by data total_detection_time = int(sum(e[2] - e[0] for e in emergencies)) timeseries_length = len(observations) errors = calc_errors(timeseries_length, (MSE, ), map(lambda a: ([a[0]], [a[1]]), zip(observations, predictions)), \ multivariate=True, offset=error_offset) rmse = math.sqrt(sum(errors[0]) / (timeseries_length - error_offset)) print "Known emergency time : %d seconds" % total_detection_time print "Recall : %.4f" % recall print "FAR : %.4f" % FAR print "MAT : %.4f seconds" % mat print "RMSE : %.4f" % rmse # calculate average tardiness: #avg_tardiness = calc_avg_tardiness(emergencies, triggered_emergencies) #print "Avg tardiness: %.1f seconds" % (avg_tardiness,) if PLOT: print "Plotting..." pyplot.grid(True) dates = matplotlib.dates.date2num([datetime.datetime.fromtimestamp(trace_start + i * compression_factor) \ for i in xrange(len(observations))]) p1, = pyplot.plot_date(dates, observations, '-') p2, = pyplot.plot_date(dates, predictions, '-') pyplot.plot_date(dates, [lower_temp_threshold] * len(dates), '--', color="red") # temperatura threshold pyplot.plot_date(dates, [higher_temp_threshold] * len(dates), '--', color="red") # temperatura threshold pyplot.plot_date(dates, [lower_temp_threshold + predictor_delta] * len(dates), '--', color="orange") # temperatura threshold pyplot.plot_date(dates, [higher_temp_threshold - predictor_delta] * len(dates), '--', color="orange") # temperatura threshold def plot_emergencies(emergency_list, color, style, offset=0.5): for emergency_time, emergency_temp, emergency_end in emergency_list: pyplot.plot_date((datetime.datetime.fromtimestamp(emergency_time), \ datetime.datetime.fromtimestamp(emergency_end)), \ [emergency_temp + offset] * 2, style, color=color, linewidth=3) # plot real emergencies plot_emergencies(emergencies, "red", '-', 1) # plot detected emergencies plot_emergencies(triggered_emergencies, "orange", '-', -1) pyplot.legend((p2, p1), ("predictions", "observations")) pyplot.show()
def main(args): if len(args) < 2: print "Usage: python %s {trace path} [-s (spirit mode) {weights path} {original trace path}]" % (args[0]) exit() input_path = args[1] weights_path = None original_trace_path = None spirit_mode = '-s' in args if spirit_mode: assert not MULTIVARIATE spirit_mode_index = args.index('-s') weights_path = args[spirit_mode_index + 1] original_trace_path = args[spirit_mode_index + 2] tr = TraceReader(input_path) lag = int(5 * TIME_MULT) # time units (seconds) errors_to_calculate = (filters.MSE, filters.MAE) #filter_args = {"alpha": 9 * 1e-1, "beta": 2 * 1e-2, "k": lag} #filter_args = {"alpha": 4 * 1e-1} #filter_args = {"num_last_measures" : 7, "learning_rate" : 1e-4, "lag": lag, "dataset_size": 1} #filter_args = {"dataset_size": 2 * TIME_MULT, "lag": lag, "order": 2} #filter_args = {"dataset_size": sys.maxint, "order": 1, "lag": lag, "optimize": False, \ # "c": 0, "phi": [1]} filter_args = {"lag": lag, "window_size": 3 * TIME_MULT} #filter_args = {} #filter_cls = filters.ExpAvg #filter_cls = filters.HoltFilter filter_cls = filters.AdaptableHoltFilter #filter_cls = filters.DummyFilter #filter_cls = filters.SigmoidPerceptron #filter_cls = filters.HardLearningLinearPerceptron #filter_cls = filters.LinearPerceptron #filter_cls = filters.MultiLayerPerceptron #filter_cls = filters.RollingPerceptron #filter_cls = filters.LazyRollingPerceptron #filter_cls = filters.LazyLinearPerceptron #filter_cls = filters.FiniteDiffPerceptron #filter_cls = filters.DiffPerceptron #filter_cls = filters.Oracle #filter_cls = filters.Bote #filter_cls = filters.SmoothingBote #filter_cls = filters.AdaptableARFilter #filter_cls = filters.ARFilter if filter_cls == filters.Oracle: filter_args = {'trace_path': input_path, "lag": lag} if MULTIVARIATE: forecaster = MultivariateFilter(filter_cls, filter_args, len(tr.arff_attributes) - 1) # ignore timestamp else: forecaster = filter_cls(**filter_args) mote_id = "mote_239" offset = lag + forecaster.training_delay desired_offset = 12 * TIME_MULT offset = max(offset, desired_offset) print "# INFO" print "# Lag: %d" % (lag, ) print "# Errors to calculate: %s" % (", ".join(filters.ERROR_TO_STR[x] for x in errors_to_calculate), ) print "# Forecaster: %s" % (forecaster, ) print "# Offset: %d" % (offset, ) start_time = time.time() results = [] print "Creating buffer..." # creates a buffer that consists of the length of the lag window data_gen = tr.read() # fill buffer data_buffer = deque(maxlen=lag + 1) for _i in xrange(lag + 1): data = data_gen.next() # exclude timestamp data_buffer.append(get_data(data, mote_id, tr, multivariate=MULTIVARIATE)) print "Making predictions..." # store observations and predictions count = 0 for data in data_gen: observation = data_buffer[-1] prediction = forecaster.apply(data_buffer[0]) results.append((observation, prediction)) data_buffer.append(get_data(data, mote_id, tr, multivariate=MULTIVARIATE)) count += 1 if count % 500 == 0 and isinstance(forecaster, PerceptronBase): print "%8d: %s" % (count, forecaster.debug()), len(forecaster.data) ''' for fc in forecaster.filters: fc.optimize_parameters(fc._data, fc._model) print [val / 1e+11 for val in fc._model.itervalues()] ''' print "Calculating errors..." timeseries_length = len(results) if spirit_mode: errors = calc_spirit_errors(timeseries_length, errors_to_calculate, lag, results, weights_path, original_trace_path) else: errors = calc_errors(timeseries_length, errors_to_calculate, results, multivariate=MULTIVARIATE, offset=offset) print "# RESULTS" # create averages for k in xrange(len(errors_to_calculate)): assert len(errors[k]) == timeseries_length if MODE == 'avg': avg_error = sum(errors[k][offset:]) / (timeseries_length - offset) elif MODE == 'max': avg_error = max(errors[k][offset:]) if errors_to_calculate[k] == filters.MSE and RMSE: avg_error = math.sqrt(avg_error) print "(RMSE mode)" print "%s: %f" % (filters.ERROR_TO_STR[errors_to_calculate[k]], avg_error) print "\nElapsed: %f secs" % (time.time() - start_time)