Example #1
0
conf = Config("./config.yaml")

# override configuration
conf.dictionary['l_s'] = 250
conf.dictionary['epochs'] = 80
conf.dictionary['dropout'] = 0.2
conf.batch_size = 512    # expect data coming in in batches
conf.l_s = 250           # historic data, 250 data points
conf.epochs = 10         # train at most 10 epochs
conf.dropout = 0.2       # drop 20%
conf.lstm_batch_size=64  # update params after 64 steps

# Load data from
device="Armstarknew"
chan = Channel(conf, device)
helpers.make_dirs(conf.use_id, conf, "./")
print(chan)

chan.train = np.loadtxt('./wml_train.csv')
chan.test = np.loadtxt('./wml_test.csv')

# producing overlapping windows of length 260 for lookback (250) and prediction (10)
chan.shape_data(chan.train, train=True)
chan.shape_data(chan.test, train=False)

# init Pytorch double stacked LSTM model
model = Model(conf, conf.use_id, chan, "./", False)

'''
try:
    model.model.load_state_dict(torch.load(model_input_path))
Example #2
0
                    anom["normalized_error"] = np.mean(e) / np.ptp(y_test)
                    logger.info("normalized prediction error: %s" %
                                anom["normalized_error"])

                    # Error processing (batch)
                    # =========================

                    E_seq, E_seq_scores = err.process_errors(
                        y_test, y_hat, e_s, anom, logger)
                    anom['scores'] = E_seq_scores

                    anom = err.evaluate_sequences(E_seq, anom)
                    anom["num_values"] = y_test.shape[
                        0] + config.l_s + config.n_predictions

                    for key, value in stats.items():
                        stats[key] += anom[key]

                    helpers.anom_stats(stats, anom, logger)
                    writer.writerow(anom)

    helpers.final_stats(stats, logger)


if __name__ == "__main__":
    config = Config("config.yaml")
    _id = dt.now().strftime("%Y-%m-%d_%H.%M.%S")
    helpers.make_dirs(_id)
    logger = helpers.setup_logging(config, _id)
    run(config, _id, logger)
Example #3
0
    def __init__(self,
                 labels_path=None,
                 result_path='results/',
                 config_path='config.yaml'):
        """
        Top-level class for running anomaly detection over a group of channels
        with values stored in .npy files. Also evaluates performance against a
        set of labels if provided.

        Args:
            labels_path (str): path to .csv containing labeled anomaly ranges
                for group of channels to be processed
            result_path (str): directory indicating where to stick result .csv
            config_path (str): path to config.yaml

        Attributes:
            labels_path (str): see Args
            results (list of dicts): holds dicts of results for each channel
            result_df (dataframe): results converted to pandas dataframe
            chan_df (dataframe): holds all channel information from labels .csv
            result_tracker (dict): if labels provided, holds results throughout
                processing for logging
            config (obj):  Channel class object containing train/test data
                for X,y for a single channel
            y_hat (arr): predicted channel values
            id (str): datetime id for tracking different runs
            result_path (str): see Args
        """

        self.labels_path = labels_path
        self.results = []
        self.result_df = None
        self.chan_df = None

        self.result_tracker = {
            'true_positives': 0,
            'false_positives': 0,
            'false_negatives': 0
        }

        self.config = Config(config_path)
        self.y_hat = None

        if not self.config.predict and self.config.use_id:
            self.id = self.config.use_id
        else:
            self.id = dt.now().strftime('%Y-%m-%d_%H.%M.%S')

        helpers.make_dirs(self.id)

        # add logging FileHandler based on ID
        hdlr = logging.FileHandler('data/logs/%s.log' % self.id)
        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
        hdlr.setFormatter(formatter)
        logger.addHandler(hdlr)

        self.result_path = result_path

        if self.labels_path:
            self.chan_df = pd.read_csv(labels_path)
        else:
            chan_ids = [x.split('.')[0] for x in os.listdir('data/test/')]
            self.chan_df = pd.DataFrame({"chan_id": chan_ids})

        logger.info("{} channels found for processing.".format(
            len(self.chan_df)))