def predict(self, X): """Return module predictions from posterior distribution. Parameters ---------- X : np.ndarray Input features, with shape like `self.X`. Returns ------- predictions : np.ndarray Array of scores, with shape `X.shape[0]`. """ with tf.Session(config=self.config) as sess: self.saver.restore(sess, self.save_path.format(self.model_name)) self.saver.restore(sess, self.save_path.format(self.pmf_name)) # normalize data if self.normalize: _min = self.feature_min.eval() _max = self.feature_max.eval() X = ds.rescale(X, _min, _max, -1, 1) preds = sess.run(self.posterior, feed_dict={ self.X: X, }) return preds
def train_pmf(self, X, ignore_norm=False): """Train the density estimator. Parameters ---------- X : np.ndarray Input features, with shape like `self.X`. ignore_norm : bool, optional Ignore normalization, default is False. """ training_size = X.shape[0] assert self.pmf_params.batch_size < training_size, ( 'Batch size is larger than number of samples') with tf.Session(config=self.config) as sess: sess.run(self.init_op) self.saver.restore(sess, self.save_path.format(self.model_name)) if self.normalize and not ignore_norm: _min = X.min(axis=0) _max = X.max(axis=0) X = ds.rescale(X, _min, _max, -1, 1) sess.run(self.feature_min.assign(_min)) sess.run(self.feature_max.assign(_max)) # Find p_uniform and perform special ops scores, pmf_in = sess.run([self.scores, self.pmf_in], feed_dict={self.X: X}) sess.run(self.p_uniform.assign(1 / np.unique(scores).shape[0])) # Run special ops for name, op in self.special_ops.items(): if 'train_pmf' in name: op(sess, pmf_in) batch = ds.random_batcher([X], self.pmf_params.batch_size) self.print('Training {}'.format(self.pmf_name)) self.print('Epoch | Loss') for epoch in range(self.pmf_params.n_epochs): batch_x, = next(batch) _, l = sess.run([self.bayes_opt, self.bayes_loss], feed_dict={self.X: batch_x}) if epoch % self.pmf_params.display_step == 0: self.print('{0:05} | {1:7.5f}'.format(epoch + 1, l)) self.print('Finished training density estimator') # save model save_path = self.saver.save(sess, self.save_path.format(self.pmf_name)) self.print('Model saved in file: {}'.format(save_path))
def train_model(self, X): """Train the model. Parameters ---------- X : np.ndarray Input features, with shape like `self.X`. """ training_size = X.shape[0] assert self.model_params.batch_size < training_size, ( 'batch size is larger than number of samples') with tf.Session(config=self.config) as sess: sess.run(self.init_op) if self.normalize: _min = X.min(axis=0) _max = X.max(axis=0) X = ds.rescale(X, _min, _max, -1, 1) sess.run(self.feature_min.assign(_min)) sess.run(self.feature_max.assign(_max)) # Run special ops for name, op in self.special_ops.items(): if 'train_model' in name: op(sess, X) batch = ds.random_batcher([X], self.model_params.batch_size) self.print('Training {}'.format(self.model_name)) self.print('Epoch | Loss') for epoch in range(self.model_params.n_epochs): # Don't try on one-shot models if self.model_loss is False: break batch_x, = next(batch) _, l = sess.run([self.model_opt, self.model_loss], feed_dict={self.X: batch_x}) if epoch % self.model_params.display_step == 0: self.print('{0:05} | {1:7.5f}'.format(epoch + 1, l)) self.print('Finished training {}'.format(self.model_name)) # save model save_path = self.saver.save(sess, self.save_path.format(self.model_name)) self.print('Model saved in file: {}'.format(save_path)) if self.always_train_pmf: self.train_pmf(X, ignore_norm=True)
def test(self, X, Y): """Evaluate model performance. Parameters ---------- X : np.ndarray Input features, with shape like `self.X`. Y : np.ndarray Labels for each sample. Returns ------- accuracy : float Classification accuracy of model. c_mat : np.ndarray Confusion matrix. """ with tf.Session(config=self.config) as sess: self.saver.restore(sess, self.save_path.format(self.model_name)) self.saver.restore(sess, self.save_path.format(self.pmf_name)) # normalize data if self.normalize: _min = self.feature_min.eval() _max = self.feature_max.eval() X = ds.rescale(X, _min, _max, -1, 1) print(np.max(X)) acc, mat = sess.run([self.accuracy, self.confusion_matrix], feed_dict={ self.X: X, self.Y: Y }) self.print('Accuracy = {:.3f}%'.format(acc * 100)) self.print(mat) return acc * 100, mat
def test(self, X, Y): """Tests classifier Args: X (np.ndarray): Features with shape (num_samples * time_steps, features). Y (np.array): Labels. Returns: dict: Dictionary containing the following fields: """ with tf.Session(config=self.config) as sess: self.saver.restore(sess, './model.ckpt') # normalize data if self.normalize == 'rescaling': _min = self.feature_min.eval() _max = self.feature_max.eval() X = ds.rescale(X, _min, _max, -1, 1) elif self.normalize == 'vector_norm': X = ds.vector_norm(X, -1, 1) labels, acc, mat, d_loss, g_loss = sess.run( [self.scores, self.accuracy, self.confusion_matrix, self.D_loss, self.G_loss], feed_dict={ self.X: X, self.Y: Y, self.Z: self.sample_Z(n=X.shape[0]), self.keep_prob: 1.0 } ) avg_benign = [] avg_malicious = [] for i, label in enumerate(labels): if Y[i] == 1: avg_benign.append(label) else: avg_malicious.append(label) data = { 'benign': { 'mean': np.mean(avg_benign, axis=0).tolist(), 'stddev': np.std(avg_benign, axis=0).tolist() }, 'malicious': { 'mean': np.mean(avg_malicious, axis=0).tolist(), 'stddev': np.std(avg_malicious, axis=0).tolist() } } data['confusion_matrix'] = mat.tolist() data['accuracy'] = acc * 100 data['d_loss'] = float(d_loss) data['g_loss'] = float(g_loss) self.print(json.dumps(data, indent=4)) # Embedddings Z = self.sample_Z(n=X.shape[0]) embeddings = sess.run(self.embedding_ops, feed_dict={ self.X: X, self.Y: Y, self.Z: Z, self.keep_prob: 1.0 }) for i, embedding in enumerate(embeddings): name = self.embedding_ops[i].name.split(':')[0] name = name.replace('/', '_') with open('graph/{}'.format(name), 'w') as f: csv.writer(f).writerows(embedding) return data
def train(self, X, Y): """Train the Classifier. Args: X (np.ndarray): Features with shape (num_samples * time_steps, features). Y (np.ndarray): Labels. """ training_size = X.shape[0] # normalize X if self.normalize == 'rescaling': _min = X.min(axis=0) _max = X.max(axis=0) X = ds.rescale(X, _min, _max, -1, 1) elif self.normalize == 'vector_norm': X = ds.vector_norm(X, -1, 1) assert self.batch_size < training_size, ( 'batch size is larger than training_size' ) with tf.Session(config=self.config) as sess: sess.run(self.init_op) # for tensorboard writer = tf.summary.FileWriter( logdir='logdir/train', graph=sess.graph ) prev_diff_loss = 0 batch = ds.random_batcher([X, Y], self.batch_size) count = 0 for epoch in range(self.num_epochs): d_loss = 0 g_loss = 0 k = self.adpt_l * prev_diff_loss kd, kg = np.maximum([1, 1], [k, -k]).astype(np.int32) for i in range(kd): batch_x, batch_y = next(batch) Z = self.sample_Z(n=batch_x.shape[0]) s, _, ld = sess.run( [self.merged, self.D_solver, self.D_only_loss], feed_dict={ self.X: batch_x, self.Y: batch_y, self.Z: Z, self.keep_prob: 0.5 } ) writer.add_summary(s, count) count += 1 d_loss += ld for i in range(kg): batch_x, batch_y = next(batch) Z = self.sample_Z(n=batch_x.shape[0]) s, _, lg = sess.run( [self.merged, self.G_solver, self.G_loss], feed_dict={ self.X: batch_x, self.Z: Z, self.Y: batch_y, self.keep_prob: 0.5 } ) writer.add_summary(s, count) count += 1 g_loss += lg prev_diff_loss = ld - lg if epoch % self.display_step == 0: display_str = ( 'Epoch {0:04} with D_loss={1:7.5f}||G_loss={2:.5f}' ) display_str += '\nkd={3}, kg={4}' display_str = display_str.format( epoch+1, d_loss/kd, g_loss/kg, kd, kg ) self.print(display_str) # assign normalization values if self.normalize == 'rescaling': sess.run(self.feature_min.assign(_min)) sess.run(self.feature_max.assign(_max)) self.print('Optimization Finished') # save model save_path = self.saver.save(sess, './model.ckpt') self.print('Model saved in file: {}'.format(save_path))