Ejemplo n.º 1
0
    def do_evaluate(self, day):
        """     
        Evaluation phase
        """
        dataset_fname = self.userConfig.feat_dir + '{}.txt'.format(day)
        # check if file exist
        if not os.path.exists(dataset_fname):
            return False

        input_data, target_data, red_events = process_file(
            dataset_fname, num_chars, max_len)
        self.logger.info('  evaluating: %s - num events: %d  - red events:%d',
                         dataset_fname, len(input_data), len(red_events))

        line_losses = self._eval_loss(input_data, target_data)

        self.process_anomalies_for_max(day, line_losses, red_events)

        avg_loss = np.average(line_losses)
        line_losses_diff = line_losses - avg_loss
        self.logger.info('  day: %s - avg eval loss: %s', day, avg_loss)

        self.process_anomalies_for_diff(day, line_losses_diff, red_events)

        return True
Ejemplo n.º 2
0
    def do_training(self, day):
        num_epochs = 2

        # keep results for plotting
        train_loss_results = []

        # training phase
        dataset_fname = self.user_datadir + '{0}.txt'.format(day)
        #         print('df:', dataset_fname)
        input_data, target_data, red_events = process_file(
            dataset_fname, num_chars, max_len)
        print('processing:', dataset_fname, " - num events:", len(input_data),
              " - red events:", len(red_events))

        training_dataset = tf.data.Dataset.from_tensor_slices(
            (input_data, target_data))
        training_dataset = training_dataset.batch(self.batch_size)

        # train model on a day
        loss_results = self.train(training_dataset, num_epochs)
        train_loss_results.append(loss_results)
        print('loss_results:', loss_results)

        # Save model to a file

        tf.keras.models.save_model(self.model,
                                   self.model_filepath,
                                   overwrite=True,
                                   include_optimizer=False)
Ejemplo n.º 3
0
    def do_training(self, day):
        num_epochs = 1

        dataset_fname = self.userConfig.feat_dir + '{}.txt'.format(day)

        # check if file exist
        if not os.path.exists(dataset_fname):
            return False

        input_data, target_data, red_events = process_file(dataset_fname,
                                                           num_chars,
                                                           max_len,
                                                           filter_red=True)
        self.logger.info('processing: %s - num events: %d  - red events:%d',
                         dataset_fname, len(input_data), len(red_events))

        if len(input_data) == 0:  # nothing in the file
            return False

        # train model on a day
        loss_results = self._train(input_data, target_data, num_epochs)

        self.logger.info('day: %s - training avg loss: %s', day, loss_results)
        self.output_loss_file.write('{}\n'.format(loss_results))

        # Save model to a file
        self.model.save(self.userConfig.model_filepath)

        return True
Ejemplo n.º 4
0
    def do_evaluate(self, day, output_filepath):
        """     
        Evaluation phase
        """
        dataset_fname = self.user_datadir + '{0}.txt'.format(day)
        input_data, target_data, red_events = process_file(
            dataset_fname, num_chars, max_len)
        print('  evaluating:', dataset_fname, " - num events:",
              len(input_data), " - red events:", len(red_events))

        eval_dataset = tf.data.Dataset.from_tensor_slices(
            (input_data, target_data))
        eval_dataset = eval_dataset.batch(self.batch_size)

        line_losses = np.array([])

        # eval using batches of 'batch_size'
        for X, y in tfe.Iterator(eval_dataset):
            batch_loss = self.loss(X, y)
            line_losses = np.append(line_losses, batch_loss)

        possible_anomalies = [(i, v) for i, v in enumerate(line_losses)]
        possible_anomalies.sort(key=lambda x: x[1], reverse=True)

        print('    max:', possible_anomalies[:10])
        print('    red events:', [a for a, b in red_events])

        # write top 20 losses to a file with the format (day, score, redevent)
        with open(output_filepath, 'w+') as outfile:
            for i, v in possible_anomalies[:20]:
                red = '0'
                for a, b in red_events:
                    if a == i:
                        red = '1'
                        break
                line = '{0},{1},{2}\n'.format(day, v, red)
                outfile.write(line)
            outfile.close()
Ejemplo n.º 5
0
import logging
import tensorflow as tf
import char_keras_lm as lm
from process_utils import process_file, UserConfig


if __name__ == "__main__":

    users_indir = '../data/users_feats'
    users_lossdir = '../data/test/users_losses'
    users_modeldir = '../data/exper3__all__1epoch__bidi_model/users_models'
    users_logidr = '../data/test/users_logs'

    u = 'U12'
    userConfig = UserConfig()
    userConfig.user_name = u
    userConfig.feat_dir = '{0}/{1}/'.format(users_indir, u)
    userConfig.output_base_filepath = '{0}/{1}_losses'.format(users_lossdir, u)
    userConfig.model_filepath = '{0}/{1}_simple_lm.hdf5'.format(users_modeldir, u)
    userConfig.log_filepath = '{}/{}_log.txt'.format(users_logidr, u)


    day = 10
    char_lm = lm.KerasLM(userConfig)
    dataset_fname = userConfig.feat_dir+'{}.txt'.format(day)

    input_data, target_data, red_events = process_file(dataset_fname, num_chars, max_len)
    self.logger.info('  evaluating: %s - num events: %d  - red events:%d', dataset_fname, len(input_data), len(red_events))

    ### see testing_model.ipynb for implementation