conf = Config("./config.yaml") # override configuration conf.dictionary['l_s'] = 250 conf.dictionary['epochs'] = 80 conf.dictionary['dropout'] = 0.2 conf.batch_size = 512 # expect data coming in in batches conf.l_s = 250 # historic data, 250 data points conf.epochs = 10 # train at most 10 epochs conf.dropout = 0.2 # drop 20% conf.lstm_batch_size=64 # update params after 64 steps # Load data from device="Armstarknew" chan = Channel(conf, device) helpers.make_dirs(conf.use_id, conf, "./") print(chan) chan.train = np.loadtxt('./wml_train.csv') chan.test = np.loadtxt('./wml_test.csv') # producing overlapping windows of length 260 for lookback (250) and prediction (10) chan.shape_data(chan.train, train=True) chan.shape_data(chan.test, train=False) # init Pytorch double stacked LSTM model model = Model(conf, conf.use_id, chan, "./", False) ''' try: model.model.load_state_dict(torch.load(model_input_path))
anom["normalized_error"] = np.mean(e) / np.ptp(y_test) logger.info("normalized prediction error: %s" % anom["normalized_error"]) # Error processing (batch) # ========================= E_seq, E_seq_scores = err.process_errors( y_test, y_hat, e_s, anom, logger) anom['scores'] = E_seq_scores anom = err.evaluate_sequences(E_seq, anom) anom["num_values"] = y_test.shape[ 0] + config.l_s + config.n_predictions for key, value in stats.items(): stats[key] += anom[key] helpers.anom_stats(stats, anom, logger) writer.writerow(anom) helpers.final_stats(stats, logger) if __name__ == "__main__": config = Config("config.yaml") _id = dt.now().strftime("%Y-%m-%d_%H.%M.%S") helpers.make_dirs(_id) logger = helpers.setup_logging(config, _id) run(config, _id, logger)
def __init__(self, labels_path=None, result_path='results/', config_path='config.yaml'): """ Top-level class for running anomaly detection over a group of channels with values stored in .npy files. Also evaluates performance against a set of labels if provided. Args: labels_path (str): path to .csv containing labeled anomaly ranges for group of channels to be processed result_path (str): directory indicating where to stick result .csv config_path (str): path to config.yaml Attributes: labels_path (str): see Args results (list of dicts): holds dicts of results for each channel result_df (dataframe): results converted to pandas dataframe chan_df (dataframe): holds all channel information from labels .csv result_tracker (dict): if labels provided, holds results throughout processing for logging config (obj): Channel class object containing train/test data for X,y for a single channel y_hat (arr): predicted channel values id (str): datetime id for tracking different runs result_path (str): see Args """ self.labels_path = labels_path self.results = [] self.result_df = None self.chan_df = None self.result_tracker = { 'true_positives': 0, 'false_positives': 0, 'false_negatives': 0 } self.config = Config(config_path) self.y_hat = None if not self.config.predict and self.config.use_id: self.id = self.config.use_id else: self.id = dt.now().strftime('%Y-%m-%d_%H.%M.%S') helpers.make_dirs(self.id) # add logging FileHandler based on ID hdlr = logging.FileHandler('data/logs/%s.log' % self.id) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) self.result_path = result_path if self.labels_path: self.chan_df = pd.read_csv(labels_path) else: chan_ids = [x.split('.')[0] for x in os.listdir('data/test/')] self.chan_df = pd.DataFrame({"chan_id": chan_ids}) logger.info("{} channels found for processing.".format( len(self.chan_df)))