Exemple #1
0
    def __init__(self):
        parser = argparse.ArgumentParser(description='Shuffle multiple videos')
        parser.add_argument('-c',
                            '--config_file',
                            required=True,
                            help='Configuration file with all the parameters')
        parser.add_argument('-i',
                            '--input_directory',
                            required=True,
                            help='Input directory')
        self.args = parser.parse_args()

        self.config_reader = ConfigReader()
        self.config_reader.read(self.args.config_file)

        self.args = parser.parse_args()
        self.input_directory = self.args.input_directory
        self.file_pattern = self.config_reader.get("file_pattern")
        self.input_file_list = self.get_input_file_list()
        self.window_size = self.config_reader.get("window_size")
        self.step = self.config_reader.get("step_size")
        self.label_list = self.config_reader.get("labels")
        self.output_file = self.args.input_directory + "/" + self.config_reader.get(
            "output_file")
        print(" self.output_file", self.output_file)
        self.xyt_data = []
 def __init__(self):
     parser = argparse.ArgumentParser(
         description='Test the Lstm-based autoencoder for anomaly detection'
     )
     parser.add_argument('-c',
                         '--config_file',
                         required=True,
                         help='Configuration file with all the parameters.')
     parser.add_argument('-i',
                         '--input_file',
                         required=True,
                         help='Name of the file with the features.')
     parser.add_argument('-m',
                         '--model',
                         required=True,
                         help='Checkpoint of the model')
     parser.add_argument(
         '-t',
         '--test_type',
         required=True,
         help='Test type: boolean, on positive or negative data')
     self.args = parser.parse_args()
     self.config_reader = ConfigReader()
     self.config_reader.read(self.args.config_file)
     self.input_file = self.args.input_file
     self.checkpoint_file_path = self.args.model
     self.get_test_type(self.args)
    def __init__(self):
        parser = argparse.ArgumentParser(
            description='Test the Lstm-based autoencoder for anomaly detection'
        )
        parser.add_argument('-c',
                            '--config_file',
                            required=True,
                            help='Configuration file with all the parameters.')
        parser.add_argument('-i',
                            '--input_file',
                            required=True,
                            help='Name of the file with the features.')
        parser.add_argument(
            '-s',
            '--stage',
            required=True,
            help=
            'Expects the stage type: 1 for train, 2 for validation, 3 for test'
        )

        self.args = parser.parse_args()
        self.config_reader = ConfigReader()
        self.config_reader.read(self.args.config_file)
        self.get_stage(self.args.stage)
        self.input_file = self.args.input_file
        self.get_test_type(self.args)
 def __init__(self):
     parser = argparse.ArgumentParser(description='Lstm-based autoencoder for anomaly detection')
     parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters.')
     parser.add_argument('-i', '--input_file', required=True, help='Name of the file with the features.')
     self.args = parser.parse_args()
     self.config_reader = ConfigReader()
     self.config_reader.read(self.args.config_file)
     self.input_file = self.args.input_file
class TestLen():
    def __init__(self):
        parser = argparse.ArgumentParser(
            description='Test the Lstm-based autoencoder for anomaly detection'
        )
        parser.add_argument('-c',
                            '--config_file',
                            required=True,
                            help='Configuration file with all the parameters.')
        parser.add_argument('-i',
                            '--input_file',
                            required=True,
                            help='Name of the file with the features.')
        parser.add_argument(
            '-s',
            '--stage',
            required=True,
            help=
            'Expects the stage type: 1 for train, 2 for validation, 3 for test'
        )

        self.args = parser.parse_args()
        self.config_reader = ConfigReader()
        self.config_reader.read(self.args.config_file)
        self.get_stage(self.args.stage)
        self.input_file = self.args.input_file
        self.get_test_type(self.args)

    def get_test_type(self, args):
        if utils.str2bool(args.test_type) == True:
            self.test_type = TestType.positive
        else:
            self.test_type = TestType.negative

    def get_stage(self, stage):
        if stage == 1:
            self.stage = Stage.train
        elif stage == 2:
            self.stage = Stage.validation
        elif stage == 3:
            self.stage = Stage.test
        else:
            exit()

    def execute(self):
        test_generator = DataGenerator(self.input_file,
                                       self.config_reader.get("batch_size"),
                                       Stage.test,
                                       self.config_reader.get("epochs"),
                                       self.config_reader.get("train_labels"),
                                       self.config_reader.get("test_labels"),
                                       self.test_type)

        dict = test_generator.get_class_distribution()
        for key, value in dict.items():
            print("key, value ", key, value)
class AnomalyDetection():
    def __init__(self):
        parser = argparse.ArgumentParser(description='Lstm-based autoencoder for anomaly detection')
        parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters.')
        parser.add_argument('-i', '--input_file', required=True, help='Name of the file with the features.')
        self.args = parser.parse_args()
        self.config_reader = ConfigReader()
        self.config_reader.read(self.args.config_file)
        self.input_file = self.args.input_file

    def shuffle_files(self):
        return self.args.shuffle_files

    def execute_tf(self):
        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                allow_soft_placement=utils.str2bool(self.config_reader.get("allow_soft_placement")),
                log_device_placement=utils.str2bool(self.config_reader.get("log_device_placement"))
            )
            sess = tf.Session(config=session_conf)
            with sess.as_default():
                ae = LSTMAutoencoder(
                    self.config_reader.get("hidden_num"),
                    self.config_reader.get("batch_size"),
                    self.config_reader.get("window_size"),
                    self.config_reader.get("element_num"),
                    decode_without_input=True
                )

                global_step = tf.Variable(0, name="global_step", trainable=False)
                optimizer = tf.train.AdamOptimizer(self.config_reader.get("learning_rate"))
                tvars = tf.trainable_variables()
                grads, _ = tf.clip_by_global_norm(tf.gradients(ae.loss, tvars), self.config_reader.get("max_grad_norm"))
                train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=tf.train.get_or_create_global_step())

                # Output directory for models and summaries
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
                print("Writing to {}\n".format(out_dir))

                # Train Summaries
                loss_summary = tf.summary.scalar("loss", ae.loss)

                #mean of loss for validation
                with tf.variable_scope("metrics"):
                    metrics = {'loss': tf.metrics.mean(ae.loss)}

                update_metrics_op = tf.group(*[op for _, op in metrics.values()])
                metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics")
                metrics_init_op = tf.variables_initializer(metric_variables)

                train_summary_op = tf.summary.merge([loss_summary])
                train_summary_dir = os.path.join(out_dir, "summaries", "train")
                train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

                # Validation summaries
                val_summary_op = tf.summary.merge([loss_summary, ])
                val_summary_dir = os.path.join(out_dir, "summaries", "dev")
                val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph)

                # Checkpoint directory (Tensorflow assumes this directory already exists so we need to create it)
                checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
                checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                saver = tf.train.Saver(tf.global_variables(), max_to_keep=self.config_reader.get("num_checkpoints"))

                sess.run(tf.global_variables_initializer())
                sess.graph.finalize()

                # Define training and validation steps (batch)
                def train_step(inputs):
                    """
                    A single training step
                    """
                    print('global_step: %s' % tf.train.global_step(sess, global_step))
                    feed_dict = {
                        ae.input_data: inputs,
                    }
                    (loss_val, _, summaries, step) = sess.run(
                        [ae.loss, train_op, train_summary_op, global_step],
                        feed_dict
                    )
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}".format(time_str, step, loss_val))
                    train_summary_writer.add_summary(summaries, step)

                def dev_step():
                    """
                    Evaluates model on a validation data
                    """
                    validation_generator = DataGenerator(
                        self.input_file,
                        self.config_reader.get("batch_size"),
                        Stage.validation,
                        self.config_reader.get("epochs"),
                        self.config_reader.get("train_labels"),
                        self.config_reader.get("test_labels")
                    )
                    validation_generator_data = validation_generator.generate_batches()
                    sess.run(metrics_init_op)

                    for x_val, y_val, date_time in validation_generator_data:
                        feed_dict = {
                            ae.input_data: x_val,
                        }
                        (loss_val, summaries, step, mean_val) = sess.run(
                            [
                                ae.loss,
                                val_summary_op,
                                global_step,
                                update_metrics_op
                            ],
                            feed_dict
                        )
                        time_str = datetime.datetime.now().isoformat()
                        print("{}: step {}, loss {:g}, {} date_time, {} label"
                            .format(time_str, step, loss_val, date_time, y_val)
                        )

                    metrics_values = {k: v[0] for k, v in metrics.items()}
                    metrics_val = sess.run(metrics_values)

                    mean_summary = tf.Summary()
                    mean_summary.value.add(tag='loss', simple_value=metrics_val["loss"])
                    val_summary_writer.add_summary(mean_summary,step)


                training_generator = DataGenerator(
                    self.input_file,
                    self.config_reader.get("batch_size"),
                    Stage.train,
                    self.config_reader.get("epochs"),
                    self.config_reader.get("train_labels"),
                    self.config_reader.get("test_labels")
                )
                training_generator_data = training_generator.generate_batches()

                for x_batch, _, _ in training_generator_data:
                    train_step(x_batch)
                    current_step = tf.train.global_step(sess, global_step)
                    if current_step % self.config_reader.get("checkpoint_every") == 0:
                        path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                        print("Saved model checkpoint to {}\n".format(path))

                    if current_step % self.config_reader.get("evaluate_every") == 0:
                        print("Evaluation:\n")
                        dev_step()

                (input_, output_) = sess.run([ae.input_, ae.output_], {ae.input_data: x_batch})
                print('train result :')
                print('input :', input_[0, :, :].flatten())
                print('output :', output_[0, :, :].flatten())
Exemple #7
0
class DataShuffler():
    def __init__(self):
        parser = argparse.ArgumentParser(description='Shuffle multiple videos')
        parser.add_argument('-c',
                            '--config_file',
                            required=True,
                            help='Configuration file with all the parameters')
        parser.add_argument('-i',
                            '--input_directory',
                            required=True,
                            help='Input directory')
        self.args = parser.parse_args()

        self.config_reader = ConfigReader()
        self.config_reader.read(self.args.config_file)

        self.args = parser.parse_args()
        self.input_directory = self.args.input_directory
        self.file_pattern = self.config_reader.get("file_pattern")
        self.input_file_list = self.get_input_file_list()
        self.window_size = self.config_reader.get("window_size")
        self.step = self.config_reader.get("step_size")
        self.label_list = self.config_reader.get("labels")
        self.output_file = self.args.input_directory + "/" + self.config_reader.get(
            "output_file")
        print(" self.output_file", self.output_file)
        self.xyt_data = []

    def get_input_file_list(self):
        return [
            join(self.input_directory, f)
            for f in listdir(self.input_directory)
            if isfile(join(self.input_directory, f)) and self.file_pattern in f
        ]

    def generate_split_x_data_into_windows(self, x_data, date_time_data,
                                           window_size, step):
        """Splits sequence data into windows. Window size must be odd."""
        N = x_data.shape[0]
        window_start_pos = 0
        while window_start_pos < (N - window_size):
            yield (x_data[window_start_pos:window_start_pos + window_size, :],
                   date_time_data[window_start_pos:window_start_pos +
                                  window_size, ])
            window_start_pos += step

    def rreplace(self, s, old, new, occurrence):
        li = s.rsplit(old, occurrence)
        return new.join(li)

    def shuffle_data(self):
        x_data = []
        y_data = []
        date_time_data = []
        for file_name in self.input_file_list:
            print("file_name", file_name)
            f_in = h5py.File(file_name, 'r')
            for group_name, group_object in f_in.items():
                if group_name not in self.label_list:
                    continue
                print("group_name = ", group_name)
                for dataset_name, dataset_object in group_object.items():
                    print("dataset_name = ", dataset_name, "*", dataset_object)
                    if ("_x" in dataset_name):
                        time_dataset_name = self.rreplace(
                            dataset_name, "x", "time", 1)
                        my_generator = self.generate_split_x_data_into_windows(
                            dataset_object, group_object[time_dataset_name],
                            self.window_size, self.step)
                        for x_item, date_time in my_generator:
                            x_data.append(x_item)
                            y_data.append(np.string_(group_name))
                            date_time_data.append(date_time)
            f_in.close()
        self.xyt_data = list(zip(x_data, y_data, date_time_data))
        random.shuffle(self.xyt_data)

    def save_data(self):
        x_data, y_data, date_time_data = zip(*self.xyt_data)
        hdf5_file = h5py.File(self.output_file, mode='w')
        x_data_shape = (len(y_data), len(x_data[0]), len(x_data[0][0]))
        hdf5_file.create_dataset("x_data",
                                 x_data_shape,
                                 np.float64,
                                 data=x_data)
        hdf5_file.create_dataset("y_data", data=y_data)
        hdf5_file.create_dataset("date_and_time", data=date_time_data)

        hdf5_file.close()
class AnomalyDetectionTest():
    def __init__(self):
        parser = argparse.ArgumentParser(
            description='Test the Lstm-based autoencoder for anomaly detection'
        )
        parser.add_argument('-c',
                            '--config_file',
                            required=True,
                            help='Configuration file with all the parameters.')
        parser.add_argument('-i',
                            '--input_file',
                            required=True,
                            help='Name of the file with the features.')
        parser.add_argument('-m',
                            '--model',
                            required=True,
                            help='Checkpoint of the model')
        parser.add_argument(
            '-t',
            '--test_type',
            required=True,
            help='Test type: boolean, on positive or negative data')
        self.args = parser.parse_args()
        self.config_reader = ConfigReader()
        self.config_reader.read(self.args.config_file)
        self.input_file = self.args.input_file
        self.checkpoint_file_path = self.args.model
        self.get_test_type(self.args)

    def get_test_type(self, args):
        if utils.str2bool(args.test_type) == True:
            self.test_type = TestType.positive
        else:
            self.test_type = TestType.negative

    def execute_tf(self):
        test_generator = DataGenerator(self.input_file,
                                       self.config_reader.get("batch_size"),
                                       Stage.test,
                                       self.config_reader.get("epochs"),
                                       self.config_reader.get("train_labels"),
                                       self.config_reader.get("test_labels"),
                                       self.test_type)
        test_generator_data = test_generator.generate_batches()
        checkpoint_file = tf.train.latest_checkpoint(self.checkpoint_file_path)
        graph = tf.Graph()
        with graph.as_default():
            session_conf = tf.ConfigProto(
                allow_soft_placement=utils.str2bool(
                    self.config_reader.get("allow_soft_placement")),
                log_device_placement=utils.str2bool(
                    self.config_reader.get("log_device_placement")))
            sess = tf.Session(config=session_conf)

            with sess.as_default():
                saver = tf.train.import_meta_graph(
                    "{}.meta".format(checkpoint_file))
                saver.restore(sess, checkpoint_file)

                inputs = graph.get_operation_by_name("input").outputs[0]
                loss = graph.get_operation_by_name("loss").outputs[0]

                loss_summary = tf.summary.scalar("loss", loss)

                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(
                    os.path.join(os.path.curdir, "runs", timestamp))
                print("Writing to {}\n".format(out_dir))

                test_summary_op = tf.summary.merge([loss_summary])
                test_summary_dir = os.path.join(out_dir, "summaries", "test")
                test_summary_writer = tf.summary.FileWriter(
                    test_summary_dir, sess.graph)

                i = 0
                for x_data, _, date_time in test_generator_data:
                    feed_dict = {
                        inputs: x_data,
                    }
                    (test_loss, summary) = sess.run([loss, test_summary_op],
                                                    feed_dict)
                    test_summary_writer.add_summary(summary, i)
                    i = i + 1
                    print("l", test_loss, date_time)

                test_summary_writer.close()