def __init__(self): parser = argparse.ArgumentParser(description='Shuffle multiple videos') parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters') parser.add_argument('-i', '--input_directory', required=True, help='Input directory') self.args = parser.parse_args() self.config_reader = ConfigReader() self.config_reader.read(self.args.config_file) self.args = parser.parse_args() self.input_directory = self.args.input_directory self.file_pattern = self.config_reader.get("file_pattern") self.input_file_list = self.get_input_file_list() self.window_size = self.config_reader.get("window_size") self.step = self.config_reader.get("step_size") self.label_list = self.config_reader.get("labels") self.output_file = self.args.input_directory + "/" + self.config_reader.get( "output_file") print(" self.output_file", self.output_file) self.xyt_data = []
def __init__(self): parser = argparse.ArgumentParser( description='Test the Lstm-based autoencoder for anomaly detection' ) parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters.') parser.add_argument('-i', '--input_file', required=True, help='Name of the file with the features.') parser.add_argument('-m', '--model', required=True, help='Checkpoint of the model') parser.add_argument( '-t', '--test_type', required=True, help='Test type: boolean, on positive or negative data') self.args = parser.parse_args() self.config_reader = ConfigReader() self.config_reader.read(self.args.config_file) self.input_file = self.args.input_file self.checkpoint_file_path = self.args.model self.get_test_type(self.args)
def __init__(self): parser = argparse.ArgumentParser( description='Test the Lstm-based autoencoder for anomaly detection' ) parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters.') parser.add_argument('-i', '--input_file', required=True, help='Name of the file with the features.') parser.add_argument( '-s', '--stage', required=True, help= 'Expects the stage type: 1 for train, 2 for validation, 3 for test' ) self.args = parser.parse_args() self.config_reader = ConfigReader() self.config_reader.read(self.args.config_file) self.get_stage(self.args.stage) self.input_file = self.args.input_file self.get_test_type(self.args)
def __init__(self): parser = argparse.ArgumentParser(description='Lstm-based autoencoder for anomaly detection') parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters.') parser.add_argument('-i', '--input_file', required=True, help='Name of the file with the features.') self.args = parser.parse_args() self.config_reader = ConfigReader() self.config_reader.read(self.args.config_file) self.input_file = self.args.input_file
class TestLen(): def __init__(self): parser = argparse.ArgumentParser( description='Test the Lstm-based autoencoder for anomaly detection' ) parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters.') parser.add_argument('-i', '--input_file', required=True, help='Name of the file with the features.') parser.add_argument( '-s', '--stage', required=True, help= 'Expects the stage type: 1 for train, 2 for validation, 3 for test' ) self.args = parser.parse_args() self.config_reader = ConfigReader() self.config_reader.read(self.args.config_file) self.get_stage(self.args.stage) self.input_file = self.args.input_file self.get_test_type(self.args) def get_test_type(self, args): if utils.str2bool(args.test_type) == True: self.test_type = TestType.positive else: self.test_type = TestType.negative def get_stage(self, stage): if stage == 1: self.stage = Stage.train elif stage == 2: self.stage = Stage.validation elif stage == 3: self.stage = Stage.test else: exit() def execute(self): test_generator = DataGenerator(self.input_file, self.config_reader.get("batch_size"), Stage.test, self.config_reader.get("epochs"), self.config_reader.get("train_labels"), self.config_reader.get("test_labels"), self.test_type) dict = test_generator.get_class_distribution() for key, value in dict.items(): print("key, value ", key, value)
class AnomalyDetection(): def __init__(self): parser = argparse.ArgumentParser(description='Lstm-based autoencoder for anomaly detection') parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters.') parser.add_argument('-i', '--input_file', required=True, help='Name of the file with the features.') self.args = parser.parse_args() self.config_reader = ConfigReader() self.config_reader.read(self.args.config_file) self.input_file = self.args.input_file def shuffle_files(self): return self.args.shuffle_files def execute_tf(self): with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=utils.str2bool(self.config_reader.get("allow_soft_placement")), log_device_placement=utils.str2bool(self.config_reader.get("log_device_placement")) ) sess = tf.Session(config=session_conf) with sess.as_default(): ae = LSTMAutoencoder( self.config_reader.get("hidden_num"), self.config_reader.get("batch_size"), self.config_reader.get("window_size"), self.config_reader.get("element_num"), decode_without_input=True ) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(self.config_reader.get("learning_rate")) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(ae.loss, tvars), self.config_reader.get("max_grad_norm")) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=tf.train.get_or_create_global_step()) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Train Summaries loss_summary = tf.summary.scalar("loss", ae.loss) #mean of loss for validation with tf.variable_scope("metrics"): metrics = {'loss': tf.metrics.mean(ae.loss)} update_metrics_op = tf.group(*[op for _, op in metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics") metrics_init_op = tf.variables_initializer(metric_variables) train_summary_op = tf.summary.merge([loss_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Validation summaries val_summary_op = tf.summary.merge([loss_summary, ]) val_summary_dir = os.path.join(out_dir, "summaries", "dev") val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph) # Checkpoint directory (Tensorflow assumes this directory already exists so we need to create it) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=self.config_reader.get("num_checkpoints")) sess.run(tf.global_variables_initializer()) sess.graph.finalize() # Define training and validation steps (batch) def train_step(inputs): """ A single training step """ print('global_step: %s' % tf.train.global_step(sess, global_step)) feed_dict = { ae.input_data: inputs, } (loss_val, _, summaries, step) = sess.run( [ae.loss, train_op, train_summary_op, global_step], feed_dict ) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}".format(time_str, step, loss_val)) train_summary_writer.add_summary(summaries, step) def dev_step(): """ Evaluates model on a validation data """ validation_generator = DataGenerator( self.input_file, self.config_reader.get("batch_size"), Stage.validation, self.config_reader.get("epochs"), self.config_reader.get("train_labels"), self.config_reader.get("test_labels") ) validation_generator_data = validation_generator.generate_batches() sess.run(metrics_init_op) for x_val, y_val, date_time in validation_generator_data: feed_dict = { ae.input_data: x_val, } (loss_val, summaries, step, mean_val) = sess.run( [ ae.loss, val_summary_op, global_step, update_metrics_op ], feed_dict ) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, {} date_time, {} label" .format(time_str, step, loss_val, date_time, y_val) ) metrics_values = {k: v[0] for k, v in metrics.items()} metrics_val = sess.run(metrics_values) mean_summary = tf.Summary() mean_summary.value.add(tag='loss', simple_value=metrics_val["loss"]) val_summary_writer.add_summary(mean_summary,step) training_generator = DataGenerator( self.input_file, self.config_reader.get("batch_size"), Stage.train, self.config_reader.get("epochs"), self.config_reader.get("train_labels"), self.config_reader.get("test_labels") ) training_generator_data = training_generator.generate_batches() for x_batch, _, _ in training_generator_data: train_step(x_batch) current_step = tf.train.global_step(sess, global_step) if current_step % self.config_reader.get("checkpoint_every") == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) if current_step % self.config_reader.get("evaluate_every") == 0: print("Evaluation:\n") dev_step() (input_, output_) = sess.run([ae.input_, ae.output_], {ae.input_data: x_batch}) print('train result :') print('input :', input_[0, :, :].flatten()) print('output :', output_[0, :, :].flatten())
class DataShuffler(): def __init__(self): parser = argparse.ArgumentParser(description='Shuffle multiple videos') parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters') parser.add_argument('-i', '--input_directory', required=True, help='Input directory') self.args = parser.parse_args() self.config_reader = ConfigReader() self.config_reader.read(self.args.config_file) self.args = parser.parse_args() self.input_directory = self.args.input_directory self.file_pattern = self.config_reader.get("file_pattern") self.input_file_list = self.get_input_file_list() self.window_size = self.config_reader.get("window_size") self.step = self.config_reader.get("step_size") self.label_list = self.config_reader.get("labels") self.output_file = self.args.input_directory + "/" + self.config_reader.get( "output_file") print(" self.output_file", self.output_file) self.xyt_data = [] def get_input_file_list(self): return [ join(self.input_directory, f) for f in listdir(self.input_directory) if isfile(join(self.input_directory, f)) and self.file_pattern in f ] def generate_split_x_data_into_windows(self, x_data, date_time_data, window_size, step): """Splits sequence data into windows. Window size must be odd.""" N = x_data.shape[0] window_start_pos = 0 while window_start_pos < (N - window_size): yield (x_data[window_start_pos:window_start_pos + window_size, :], date_time_data[window_start_pos:window_start_pos + window_size, ]) window_start_pos += step def rreplace(self, s, old, new, occurrence): li = s.rsplit(old, occurrence) return new.join(li) def shuffle_data(self): x_data = [] y_data = [] date_time_data = [] for file_name in self.input_file_list: print("file_name", file_name) f_in = h5py.File(file_name, 'r') for group_name, group_object in f_in.items(): if group_name not in self.label_list: continue print("group_name = ", group_name) for dataset_name, dataset_object in group_object.items(): print("dataset_name = ", dataset_name, "*", dataset_object) if ("_x" in dataset_name): time_dataset_name = self.rreplace( dataset_name, "x", "time", 1) my_generator = self.generate_split_x_data_into_windows( dataset_object, group_object[time_dataset_name], self.window_size, self.step) for x_item, date_time in my_generator: x_data.append(x_item) y_data.append(np.string_(group_name)) date_time_data.append(date_time) f_in.close() self.xyt_data = list(zip(x_data, y_data, date_time_data)) random.shuffle(self.xyt_data) def save_data(self): x_data, y_data, date_time_data = zip(*self.xyt_data) hdf5_file = h5py.File(self.output_file, mode='w') x_data_shape = (len(y_data), len(x_data[0]), len(x_data[0][0])) hdf5_file.create_dataset("x_data", x_data_shape, np.float64, data=x_data) hdf5_file.create_dataset("y_data", data=y_data) hdf5_file.create_dataset("date_and_time", data=date_time_data) hdf5_file.close()
class AnomalyDetectionTest(): def __init__(self): parser = argparse.ArgumentParser( description='Test the Lstm-based autoencoder for anomaly detection' ) parser.add_argument('-c', '--config_file', required=True, help='Configuration file with all the parameters.') parser.add_argument('-i', '--input_file', required=True, help='Name of the file with the features.') parser.add_argument('-m', '--model', required=True, help='Checkpoint of the model') parser.add_argument( '-t', '--test_type', required=True, help='Test type: boolean, on positive or negative data') self.args = parser.parse_args() self.config_reader = ConfigReader() self.config_reader.read(self.args.config_file) self.input_file = self.args.input_file self.checkpoint_file_path = self.args.model self.get_test_type(self.args) def get_test_type(self, args): if utils.str2bool(args.test_type) == True: self.test_type = TestType.positive else: self.test_type = TestType.negative def execute_tf(self): test_generator = DataGenerator(self.input_file, self.config_reader.get("batch_size"), Stage.test, self.config_reader.get("epochs"), self.config_reader.get("train_labels"), self.config_reader.get("test_labels"), self.test_type) test_generator_data = test_generator.generate_batches() checkpoint_file = tf.train.latest_checkpoint(self.checkpoint_file_path) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=utils.str2bool( self.config_reader.get("allow_soft_placement")), log_device_placement=utils.str2bool( self.config_reader.get("log_device_placement"))) sess = tf.Session(config=session_conf) with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) inputs = graph.get_operation_by_name("input").outputs[0] loss = graph.get_operation_by_name("loss").outputs[0] loss_summary = tf.summary.scalar("loss", loss) timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) test_summary_op = tf.summary.merge([loss_summary]) test_summary_dir = os.path.join(out_dir, "summaries", "test") test_summary_writer = tf.summary.FileWriter( test_summary_dir, sess.graph) i = 0 for x_data, _, date_time in test_generator_data: feed_dict = { inputs: x_data, } (test_loss, summary) = sess.run([loss, test_summary_op], feed_dict) test_summary_writer.add_summary(summary, i) i = i + 1 print("l", test_loss, date_time) test_summary_writer.close()