def test_and_compute_score(self, x_test_opcode, x_test_assembly, x_test_seq_len, y_test): with tf.Session(config=utils.get_default_config()) as sess: saver = tf.train.Saver(tf.global_variables()) check_point = tf.train.get_checkpoint_state(self.checkpoint_path) if check_point and tf.train.checkpoint_exists(check_point.model_checkpoint_path): message = "Load model parameters from %s\n" % check_point.model_checkpoint_path utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, self.datetime) saver.restore(sess, check_point.model_checkpoint_path) else: raise Exception('Saved model not found.') testing_set = x_test_opcode.shape[0] - x_test_opcode.shape[0] % self.batch_size testing_batches = utils.make_batches(testing_set, self.batch_size) average_test_loss = 0.0 average_accuracy_rnn = 0.0 full_y_pred = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(testing_batches): batch_x_opcode = x_test_opcode[batch_start:batch_end] batch_x_assembly = x_test_assembly[batch_start:batch_end] batch_y = y_test[batch_start:batch_end] batch_sequence_length = x_test_seq_len[batch_start:batch_end] feed_dict = { self.X_opcode: batch_x_opcode, self.X_assembly: batch_x_assembly, self.Y: batch_y, self.sequence_length: batch_sequence_length, } # batch_test_loss, accuracy_rnn = sess.run( # [self.loss, self.accuracy_bi_rnn], # feed_dict=feed_dict) batch_test_loss = sess.run(self.loss, feed_dict=feed_dict) batch_y_pred = sess.run(self.y_pred_svm, feed_dict=feed_dict) full_y_pred = np.append(full_y_pred, batch_y_pred) average_test_loss += batch_test_loss / len(testing_batches) # average_accuracy_rnn += accuracy_rnn / len(testing_batches) full_accuracy_score = mt.accuracy_score(y_true=y_test[:testing_set], y_pred=full_y_pred) full_pre_score = mt.precision_score(y_true=y_test[:testing_set], y_pred=full_y_pred) full_f1_score = mt.f1_score(y_true=y_test[:testing_set], y_pred=full_y_pred) full_recall_score = mt.recall_score(y_true=y_test[:testing_set], y_pred=full_y_pred) full_auc_score = mt.roc_auc_score(y_true=y_test[:testing_set], y_score=full_y_pred) message = "testing loss %.5f\n" % average_test_loss message += "accuracy %.2f\n" % (full_accuracy_score * 100) message += "compute score:\n" message += '\tprecision score %.5f\n' % (full_pre_score * 100) message += '\tf1 score %.5f\n' % (full_f1_score * 100) message += '\trecall score %.5f\n' % (full_recall_score * 100) message += '\tAUC score %.5f\n' % (full_auc_score * 100) message += "-----------------------------------------------------\n" message += "Finish computing score process.\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, self.datetime)
def visualization(self, x_test_opcode, x_test_assembly, x_test_seq_len, y_test): with tf.Session(config=utils.get_default_config()) as sess: self.checkpoint_path = 'saved-model/full_dataset/good_result/2018-5-8-19-36-50' saver = tf.train.Saver(tf.global_variables()) check_point = tf.train.get_checkpoint_state(self.checkpoint_path) if check_point and tf.train.checkpoint_exists(check_point.model_checkpoint_path): message = "Load model parameters from %s\n" % check_point.model_checkpoint_path utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, self.datetime) saver.restore(sess, check_point.model_checkpoint_path) else: raise Exception('Saved model not found.') # model_path = 'saved-model/full_dataset/good_result/2018-5-8-19-36-50' testing_set = x_test_opcode.shape[0] - x_test_opcode.shape[0] % self.batch_size batch_x_opcode = [] batch_x_assembly = [] batch_y = [] batch_sequence_length = [] for i in range(3498, testing_set): # if y_test[i] == 0: if y_test[i] == 1: batch_x_opcode.append(x_test_opcode[i]) batch_x_assembly.append(x_test_assembly[i]) batch_sequence_length.append(x_test_seq_len[i]) batch_y.append(y_test[i]) if len(batch_y) == self.batch_size: break feed_dict = { self.X_opcode: batch_x_opcode, self.X_assembly: batch_x_assembly, self.Y: batch_y, self.sequence_length: batch_sequence_length, } image = sess.run( [self.cnn_input], feed_dict=feed_dict) layers = ["r", "p", "c"] path_log = os.path.join('visualize', '_epoch1000000_good_log_1_cap_tuong_tu') path_output = os.path.join('visualize', '_epoch1000000_good_output_1_cap_tuong_tu') # activation_visualization(sess_graph_path=sess, value_feed_dict=feed_dict, layers=layers, path_logdir=path_log, # path_outdir=path_output) # deconv_visualization(sess_graph_path=sess, value_feed_dict=feed_dict, input_tensor=self.cnn_input, layers=layers, path_logdir=path_log, # path_outdir=path_output) # img_normalize = image_normalization(image[0][0]) # imsave(os.path.join('visualize', '_epoch100_good_image_1_cap_tuong_tu.png'), np.reshape(img_normalize, [img_normalize.shape[0], img_normalize.shape[1]])) layer = 'cnn/relu3/Relu' deepdream_visualization(sess_graph_path=sess,input_tensor=self.two_dimension_image, value_feed_dict=feed_dict, layer=layer, classes=[1, 2, 3, 4, 5], path_logdir=path_log, path_outdir=path_output) print('Ok, I got it.')
def test(self, checkpoint_path, x_test_opcode, x_test_assembly, x_test_seq_len, y_test): graph = tf.Graph() with graph.as_default(): with tf.Session(config=utils.get_default_config()) as sess: check_point = tf.train.get_checkpoint_state(checkpoint_path) try: saver = tf.train.import_meta_graph("{}.meta".format(check_point.model_checkpoint_path)) saver.restore(sess, check_point.model_checkpoint_path) except: print("Can not find the saved model.") message = "Loaded model parameters from %s\n" % check_point.model_checkpoint_path utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, self.datetime) # get the placeholders from the graph by name X_opcode = graph.get_operation_by_name("input/x_opcode_input").outputs[0] X_assembly = graph.get_operation_by_name("input/x_assemply_input").outputs[0] Y = graph.get_operation_by_name("input/true_label").outputs[0] sequence_length = graph.get_operation_by_name("input/seq_length").outputs[0] # get tensor to visualize phi_x_tilde = graph.get_operation_by_name("oc-svm/phi_x_tilde").outputs[0] # get tensor for prediction w_phi_minus_rho2 = graph.get_operation_by_name("oc-svm/sub").outputs[0] test_set = x_test_opcode.shape[0] - x_test_opcode.shape[0] % self.batch_size test_batches = utils.make_batches(test_set, self.batch_size) full_phi_x_tilde = np.zeros((test_set, 2*self.num_random_features)) # (batch_size, 2*n_random_features) test_full_pred = np.array([]) # (batch_size, 1) for batch_idx, (batch_start, batch_end) in enumerate(test_batches): test_x_opcode = utils.convert_list_sparse_to_dense(x_test_opcode[batch_start:batch_end]) test_x_assembly = utils.convert_list_sparse_to_dense(x_test_assembly[batch_start:batch_end]) test_y = y_test[batch_start:batch_end] test_seq_len = x_test_seq_len[batch_start:batch_end] test_feed_dict = { X_opcode: test_x_opcode, X_assembly: test_x_assembly, Y: test_y, sequence_length: test_seq_len, } batch_phi_x_tilde, test_batch_pred = sess.run([phi_x_tilde, w_phi_minus_rho2], feed_dict=test_feed_dict) test_full_pred = np.append(test_full_pred, test_batch_pred) full_phi_x_tilde[batch_start:batch_end] = batch_phi_x_tilde test_min_cost_value, test_y_pred_0, test_y_pred_with_optimal_hyperplane, test_y_pred_1, test_n_data_in_strip = self.compute_cost_sensitive_loss_and_y_pred( test_full_pred, y_test[:test_set]) test_acc_0, test_pre_0, test_f1_0, test_rec_0, test_auc_0 = self.compute_score( y_true=y_test[:test_set], y_pred=test_y_pred_0) test_acc_opt, test_pre_opt, test_f1_opt, test_rec_opt, test_auc_opt = self.compute_score( y_true=y_test[:test_set], y_pred=test_y_pred_with_optimal_hyperplane) test_acc_1, test_pre_1, test_f1_1, test_rec_1, test_auc_1 = self.compute_score( y_true=y_test[:test_set], y_pred=test_y_pred_1) message = "[test] cost_sensitive_loss %.5f\n" % test_min_cost_value message += "[test] accuracy_0 %.2f\n" % (test_acc_0 * 100) message += "[test] precision_0 %.2f\n" % (test_pre_0 * 100) message += "[test] f1_0 %.2f\n" % (test_f1_0 * 100) message += "[test] recall_0 %.2f\n" % (test_rec_0 * 100) message += "[test] auc_0 %.2f\n" % (test_auc_0 * 100) message += "[test] accuracy_opt %.2f\n" % (test_acc_opt * 100) message += "[test] precision_opt %.2f\n" % (test_pre_opt * 100) message += "[test] f1_opt %.2f\n" % (test_f1_opt * 100) message += "[test] recall_opt %.2f\n" % (test_rec_opt * 100) message += "[test] auc_opt %.2f\n" % (test_auc_opt * 100) message += "[test] accuracy_1 %.2f\n" % (test_acc_1 * 100) message += "[test] precision_1 %.2f\n" % (test_pre_1 * 100) message += "[test] f1_1 %.2f\n" % (test_f1_1 * 100) message += "[test] recall_1 %.2f\n" % (test_rec_1 * 100) message += "[test] auc_1 %.2f\n" % (test_auc_1 * 100) message += "[test] n_data_in_strip %d\n" % test_n_data_in_strip message += "-----------------------------------------------------\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode)
def train(self, x_train_opcode, x_train_assembly, x_train_seq_len, y_train, x_valid_opcode, x_valid_assembly, x_valid_seq_len, y_valid, x_test_opcode, x_test_assembly, x_test_seq_len, y_test): outFile = open(self.OutName, 'w') saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) with tf.Session(config=utils.get_default_config()) as sess: writer = tf.summary.FileWriter(self.graph_path, sess.graph) check_point = tf.train.get_checkpoint_state(self.checkpoint_path) if check_point and tf.train.checkpoint_exists(check_point.model_checkpoint_path): message = "Load model parameters from %s\n" % check_point.model_checkpoint_path utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) saver.restore(sess, check_point.model_checkpoint_path) else: message = "Create the model with fresh parameters\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) sess.run(tf.global_variables_initializer()) ####Seperate dataset x_train_opcode_0 = [] x_train_opcode_1 = [] x_train_assembly_0 = [] x_train_assembly_1 = [] y_train_0 = [] y_train_1 = [] x_train_seq_len_0 = [] x_train_seq_len_1 = [] for index, aLabel in enumerate(y_train): if (aLabel == 0.0): x_train_opcode_0.append(x_train_opcode[index,:,:,:]) x_train_assembly_0.append(x_train_assembly[index,:,:,:]) y_train_0.append(y_train[index]) x_train_seq_len_0.append(x_train_seq_len[index]) else: x_train_opcode_1.append(x_train_opcode[index,:,:,:]) x_train_assembly_1.append(x_train_assembly[index,:,:,:]) y_train_1.append(y_train[index]) x_train_seq_len_1.append(x_train_seq_len[index]) x_train_opcode_0 = np.array(x_train_opcode_0) x_train_opcode_1 = np.array(x_train_opcode_1) x_train_assembly_0 = np.array(x_train_assembly_0) x_train_assembly_1 = np.array(x_train_assembly_1) min_train_0_1 = min(x_train_opcode_0.shape[0], x_train_opcode_1.shape[0]) training_set = min_train_0_1 - min_train_0_1 % (self.batch_size // 2) training_batches = utils.make_batches(training_set, (self.batch_size // 2)) ####Seperate dataset step_loss = 0.0 # average loss per epoch step_time = 0.0 full_train_accuracy_score = [] full_train_pre_score = [] full_train_f1_score = [] full_train_recall_score = [] full_train_auc_score = [] initial_step = self.global_step.eval() for step in range(initial_step, initial_step + self.num_train_steps): loss_per_batch = 0.0 start_time = time.time() full_y_predic_train = np.array([]) full_y_target_train = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(training_batches): ####Seperate batch batch_x_opcode_0 = x_train_opcode_0[batch_start:batch_end] batch_x_assembly_0 = x_train_assembly_0[batch_start:batch_end] batch_y_0 = y_train_0[batch_start:batch_end] batch_sequence_length_0 = x_train_seq_len_0[batch_start:batch_end] batch_x_opcode_1 = x_train_opcode_1[batch_start:batch_end] batch_x_assembly_1 = x_train_assembly_1[batch_start:batch_end] batch_y_1 = y_train_1[batch_start:batch_end] batch_sequence_length_1 = x_train_seq_len_1[batch_start:batch_end] batch_x_opcode = np.concatenate((batch_x_opcode_0, batch_x_opcode_1), axis=0) batch_x_assembly = np.concatenate((batch_x_assembly_0, batch_x_assembly_1), axis=0) batch_y = batch_y_0 + batch_y_1 batch_sequence_length = batch_sequence_length_0 + batch_sequence_length_1 ####Seperate batch full_y_target_train = np.append(full_y_target_train, batch_y) feed_dict = { self.X_opcode: batch_x_opcode, self.X_assembly: batch_x_assembly, self.Y: batch_y, self.sequence_length: batch_sequence_length, } soutputs, sstates, sphi_x_tilde = sess.run( [self.outputs, self.states, self.phi_x_tilde], feed_dict=feed_dict) # print("Hello") # print(soutputs.shape) # print(sstates.shape) # print(sphi_x_tilde.shape) # print(np.sum(soutputs[:,0,:])) # print(np.sum(sphi_x_tilde[:,:256])) # print(np.sum(soutputs[:,-1,:])) # print(np.sum(sstates)) # print(np.sum(sphi_x_tilde[:,-256:])) # sys.exit() _, summary, batch_loss, batch_y_pred_train = sess.run( [self.training_op, self.summary_op, self.loss, self.y_pred_svm], feed_dict=feed_dict) full_y_predic_train = np.append(full_y_predic_train, batch_y_pred_train) if (batch_idx + 1) % (len(training_batches) // 10) == 0: writer.add_summary(summary, global_step=step) loss_per_batch += batch_loss / len(training_batches) batch_train_accuracy_score = mt.accuracy_score(y_true=full_y_target_train, y_pred=full_y_predic_train) batch_train_pre_score = mt.precision_score(y_true=full_y_target_train, y_pred=full_y_predic_train) batch_train_f1_score = mt.f1_score(y_true=full_y_target_train, y_pred=full_y_predic_train) batch_train_recall_score = mt.recall_score(y_true=full_y_target_train, y_pred=full_y_predic_train) batch_train_auc_score = mt.roc_auc_score(y_true=full_y_target_train, y_score=full_y_predic_train) full_y_predic_train = np.array([]) full_y_target_train = np.array([]) full_train_accuracy_score.append(batch_train_accuracy_score) full_train_pre_score.append(batch_train_pre_score) full_train_f1_score.append(batch_train_f1_score) full_train_recall_score.append(batch_train_recall_score) full_train_auc_score.append(batch_train_auc_score) step_time += (time.time() - start_time) step_loss += loss_per_batch # if (step + 1) % 10 == 0: # # Save checkpoint and zero timer and loss. # checkpoint_path = os.path.join(self.checkpoint_path, "rnn_classifier_" + self.data_size + ".ckpt") # saver.save(sess, checkpoint_path, global_step=step) if (step + 1) % self.display_step == 0: #Train plot ave_train_accuracy_score = np.mean(full_train_accuracy_score) ave_train_pre_score = np.mean(full_train_pre_score) ave_train_f1_score = np.mean(full_train_f1_score) ave_train_recall_score = np.mean(full_train_recall_score) ave_train_auc_score = np.mean(full_train_auc_score) full_train_accuracy_score = [] full_train_pre_score = [] full_train_f1_score = [] full_train_recall_score = [] full_train_auc_score = [] message = "global step %d/%d step-time %.2fs average loss %.5f acc %.2f pre %.2f f1 %.2f rec %.2f auc %.2f\n" % ( step, self.num_train_steps - 1, step_time, step_loss, ave_train_accuracy_score, ave_train_pre_score, ave_train_f1_score, ave_train_recall_score, ave_train_auc_score) utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) outFile.write("%.2f\n" %(ave_train_accuracy_score * 100)) outFile.write("%.2f\n" %(ave_train_pre_score * 100)) outFile.write("%.2f\n" %(ave_train_f1_score * 100)) outFile.write("%.2f\n" %(ave_train_recall_score * 100)) outFile.write("%.2f\n" %(ave_train_auc_score * 100)) #Train plot #Dev plot step_time, step_loss = 0.0, 0.0 dev_set = x_valid_opcode.shape[0] - x_valid_opcode.shape[0] % self.batch_size dev_batches = utils.make_batches(dev_set, self.batch_size) ####Seperate dataset average_dev_loss = 0.0 full_y_pred_svm = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(dev_batches): valid_x_opcode = x_valid_opcode[batch_start:batch_end] valid_x_assembly = x_valid_assembly[batch_start:batch_end] valid_y = y_valid[batch_start:batch_end] valid_seq_len = x_valid_seq_len[batch_start:batch_end] ####Seperate batch feed_dict = { self.X_opcode: valid_x_opcode, self.X_assembly: valid_x_assembly, self.Y: valid_y, self.sequence_length: valid_seq_len, } batch_dev_loss, batch_y_pred = sess.run([self.loss, self.y_pred_svm], feed_dict=feed_dict) full_y_pred_svm = np.append(full_y_pred_svm, batch_y_pred) average_dev_loss += batch_dev_loss / len(dev_batches) message = "eval: accuracy_svm %.2f\n" % ( mt.accuracy_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100) message += "eval: precision_svm %.2f\n" % ( mt.precision_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100) message += "eval: f1_svm %.2f\n" % ( mt.f1_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100) message += "eval: recall_svm %.2f\n" % ( mt.recall_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100) message += "eval: roc_auc_svm %.2f\n" % ( mt.roc_auc_score(y_true=y_valid[:dev_set], y_score=full_y_pred_svm) * 100) message += "-----------------------------------------------------\n" outFile.write("%.2f\n" %(mt.accuracy_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)) outFile.write("%.2f\n" %(mt.precision_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)) outFile.write("%.2f\n" %(mt.f1_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)) outFile.write("%.2f\n" %(mt.recall_score(y_true=y_valid[:dev_set], y_pred=full_y_pred_svm) * 100)) outFile.write("%.2f\n" %(mt.roc_auc_score(y_true=y_valid[:dev_set], y_score=full_y_pred_svm) * 100)) utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) #Dev plot #Test plot #x_train_opcode, x_train_assembly, x_train_seq_len, y_train, #x_valid_opcode, x_valid_assembly, x_valid_seq_len, y_valid, #x_test_opcode, x_test_assembly, x_test_seq_len, y_test step_time, step_loss = 0.0, 0.0 test_set = x_test_opcode.shape[0] - x_test_opcode.shape[0] % self.batch_size test_batches = utils.make_batches(test_set, self.batch_size) ####Seperate dataset average_test_loss = 0.0 full_y_pred_svm_test = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(test_batches): test_x_opcode = x_test_opcode[batch_start:batch_end] test_x_assembly = x_test_assembly[batch_start:batch_end] test_y = y_test[batch_start:batch_end] test_seq_len = x_test_seq_len[batch_start:batch_end] ####Seperate batch feed_dict = { self.X_opcode: test_x_opcode, self.X_assembly: test_x_assembly, self.Y: test_y, self.sequence_length: test_seq_len, } batch_test_loss, batch_y_pred_test = sess.run([self.loss, self.y_pred_svm], feed_dict=feed_dict) full_y_pred_svm_test = np.append(full_y_pred_svm_test, batch_y_pred_test) average_test_loss += batch_test_loss / len(test_batches) message = "test: accuracy_svm %.2f\n" % ( mt.accuracy_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100) message += "test: precision_svm %.2f\n" % ( mt.precision_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100) message += "test: f1_svm %.2f\n" % ( mt.f1_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100) message += "test: recall_svm %.2f\n" % ( mt.recall_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100) message += "test: roc_auc_svm %.2f\n" % ( mt.roc_auc_score(y_true=y_test[:test_set], y_score=full_y_pred_svm_test) * 100) message += "-----------------------------------------------------\n" outFile.write("%.2f\n" %(mt.accuracy_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)) outFile.write("%.2f\n" %(mt.precision_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)) outFile.write("%.2f\n" %(mt.f1_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)) outFile.write("%.2f\n" %(mt.recall_score(y_true=y_test[:test_set], y_pred=full_y_pred_svm_test) * 100)) outFile.write("%.2f\n" %(mt.roc_auc_score(y_true=y_test[:test_set], y_score=full_y_pred_svm_test) * 100)) utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) #Test plot writer.close() message = "Finish training process.\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) outFile.close()
def train(self, x_train_opcode, x_train_assembly, x_train_seq_len, y_train, x_valid_opcode, x_valid_assembly, x_valid_seq_len, y_valid): saver = tf.train.Saver(tf.global_variables()) with tf.Session(config=utils.get_default_config()) as sess: writer = tf.summary.FileWriter(self.graph_path, sess.graph) check_point = tf.train.get_checkpoint_state(self.checkpoint_path) if check_point and tf.train.checkpoint_exists(check_point.model_checkpoint_path): message = "Load model parameters from %s\n" % check_point.model_checkpoint_path utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) saver.restore(sess, check_point.model_checkpoint_path) else: message = "Create the model with fresh parameters\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) sess.run(tf.global_variables_initializer()) training_set = x_train_opcode.shape[0] - x_train_opcode.shape[0] % self.batch_size training_batches = utils.make_batches(training_set, self.batch_size) step_loss = 0.0 # the loss per epoch on average step_time = 0.0 initial_step = self.global_step.eval() for step in range(initial_step, initial_step + self.num_train_steps): loss_per_batch = 0.0 start_time = time.time() step_predict_train = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(training_batches): batch_x_opcode = utils.convert_list_sparse_to_dense(x_train_opcode[batch_start:batch_end]) batch_x_assembly = utils.convert_list_sparse_to_dense(x_train_assembly[batch_start:batch_end]) batch_sequence_length = x_train_seq_len[batch_start:batch_end] batch_y = y_train[batch_start:batch_end] train_feed_dict = { self.X_opcode: batch_x_opcode, self.X_assembly: batch_x_assembly, self.Y: batch_y, self.sequence_length: batch_sequence_length, } _, summary, batch_loss, train_batch_y_pred = sess.run( [self.training_op, self.summary_op, self.loss, self.w_phi_minus_rho2], feed_dict=train_feed_dict) if (batch_idx + 1) % (len(training_batches) // 10) == 0: writer.add_summary(summary, global_step=step) loss_per_batch += batch_loss / len(training_batches) step_predict_train = np.append(step_predict_train, train_batch_y_pred) sys.stdout.write("\rProcessed %.2f%% of mini-batches" % (((batch_idx + 1) / len(training_batches)) * 100)) sys.stdout.flush() ''' now we had full (w*phi_tilde(x) - rho2) in 'step_predict_train' after getting rid of the for loop above ''' step_time += (time.time() - start_time) / self.display_step step_loss += loss_per_batch / self.display_step if (step + 1) % 10 == 0: # save checkpoint checkpoint_path = os.path.join(self.checkpoint_path, "rnn_classifier_" + self.data_size + ".ckpt") saver.save(sess, checkpoint_path, global_step=step) if (step + 1) % self.display_step == 0: print("\n") message = "global step %d/%d step-time %.2fs average total loss %.5f\n" % ( step, self.num_train_steps - 1, step_time, step_loss) utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) # run evaluation and print the total loss dev_set = x_valid_opcode.shape[0] - x_valid_opcode.shape[0] % self.batch_size dev_batches = utils.make_batches(dev_set, self.batch_size) average_dev_loss = 0.0 valid_full_pred = np.array([]) for batch_idx, (batch_start, batch_end) in enumerate(dev_batches): valid_x_opcode = utils.convert_list_sparse_to_dense(x_valid_opcode[batch_start:batch_end]) valid_x_assembly = utils.convert_list_sparse_to_dense(x_valid_assembly[batch_start:batch_end]) valid_y = y_valid[batch_start:batch_end] valid_seq_len = x_valid_seq_len[batch_start:batch_end] valid_feed_dict = { self.X_opcode: valid_x_opcode, self.X_assembly: valid_x_assembly, self.Y: valid_y, self.sequence_length: valid_seq_len, } batch_dev_loss, valid_batch_pred = sess.run([self.loss, self.w_phi_minus_rho2], feed_dict=valid_feed_dict) valid_full_pred = np.append(valid_full_pred, valid_batch_pred) average_dev_loss += batch_dev_loss / len(dev_batches) ''' now we have full (w*phi_tilde(x) - rho2) in the 'valid_full_pred' variable ''' pred_train_and_valid_set = np.concatenate((step_predict_train, valid_full_pred), axis=0) y_true_train_valid = np.concatenate((y_train[:training_set], y_valid[:dev_set]), axis=0) train_val_min_cost_value, train_val_y_pred_0, train_val_y_pred_with_optimal_hyperplane, train_val_y_pred_1, train_val_n_data_in_strip = self.compute_cost_sensitive_loss_and_y_pred( pred_train_and_valid_set, y_true_train_valid) train_y_pred_0 = train_val_y_pred_0[:training_set] valid_y_pred_0 = train_val_y_pred_0[training_set:] step_train_acc_0, step_train_pre_0, step_train_f1_0, step_train_rec_0, step_train_auc_0 = self.compute_score( y_true=y_train[:training_set], y_pred=train_y_pred_0) train_y_pred_with_opt_hyperplane = train_val_y_pred_with_optimal_hyperplane[:training_set] valid_y_pred_with_opt_hyperplane = train_val_y_pred_with_optimal_hyperplane[training_set:] step_train_acc_opt, step_train_pre_opt, step_train_f1_opt, step_train_rec_opt, step_train_auc_opt = self.compute_score( y_true=y_train[:training_set], y_pred=train_y_pred_with_opt_hyperplane) train_y_pred_1 = train_val_y_pred_1[:training_set] valid_y_pred_1 = train_val_y_pred_1[training_set:] step_train_acc_1, step_train_pre_1, step_train_f1_1, step_train_rec_1, step_train_auc_1 = self.compute_score( y_true=y_train[:training_set], y_pred=train_y_pred_1) message = "[train] total_loss %.5f\n" % step_loss message += "[train] cost_sensitive_loss %.5f\n" % train_val_min_cost_value message += "[train] accuracy_0 %.2f\n" % (step_train_acc_0 * 100) message += "[train] precision_0 %.2f\n" % (step_train_pre_0 * 100) message += "[train] f1_0 %.2f\n" % (step_train_f1_0 * 100) message += "[train] recall_0 %.2f\n" % (step_train_rec_0 * 100) message += "[train] auc_0 %.2f\n" % (step_train_auc_0 * 100) message += "[train] accuracy_opt %.2f\n" % (step_train_acc_opt * 100) message += "[train] precision_opt %.2f\n" % (step_train_pre_opt * 100) message += "[train] f1_opt %.2f\n" % (step_train_f1_opt * 100) message += "[train] recall_opt %.2f\n" % (step_train_rec_opt * 100) message += "[train] auc_opt %.2f\n" % (step_train_auc_opt * 100) message += "[train] accuracy_1 %.2f\n" % (step_train_acc_1 * 100) message += "[train] precision_1 %.2f\n" % (step_train_pre_1 * 100) message += "[train] f1_1 %.2f\n" % (step_train_f1_1 * 100) message += "[train] recall_1 %.2f\n" % (step_train_rec_1 * 100) message += "[train] auc_1 %.2f\n" % (step_train_auc_1 * 100) message += "[train] n_data_in_strip %d\n" % train_val_n_data_in_strip utils.print_and_write_logging_file(self.logging_path, message, self.running_mode, show_message=False) step_val_acc_0, step_val_pre_0, step_val_f1_0, step_val_rec_0, step_val_auc_0 = self.compute_score( y_true=y_valid[:dev_set], y_pred=valid_y_pred_0) step_val_acc_opt, step_val_pre_opt, step_val_f1_opt, step_val_rec_opt, step_val_auc_opt = self.compute_score( y_true=y_valid[:dev_set], y_pred=valid_y_pred_with_opt_hyperplane) step_val_acc_1, step_val_pre_1, step_val_f1_1, step_val_rec_1, step_val_auc_1 = self.compute_score( y_true=y_valid[:dev_set], y_pred=valid_y_pred_1) message = "[eval] total_loss %.5f\n" % average_dev_loss message += "[eval] cost_sensitive_loss %.5f\n" % train_val_min_cost_value message += "[eval] accuracy_0 %.2f\n" % (step_val_acc_0 * 100) message += "[eval] precision_0 %.2f\n" % (step_val_pre_0 * 100) message += "[eval] f1_0 %.2f\n" % (step_val_f1_0 * 100) message += "[eval] recall_0 %.2f\n" % (step_val_rec_0 * 100) message += "[eval] auc_0 %.2f\n" % (step_val_auc_0 * 100) message += "[eval] accuracy_opt %.2f\n" % (step_val_acc_opt * 100) message += "[eval] precision_opt %.2f\n" % (step_val_pre_opt * 100) message += "[eval] f1_opt %.2f\n" % (step_val_f1_opt * 100) message += "[eval] recall_opt %.2f\n" % (step_val_rec_opt * 100) message += "[eval] auc_opt %.2f\n" % (step_val_auc_opt * 100) message += "[eval] accuracy_1 %.2f\n" % (step_val_acc_1 * 100) message += "[eval] precision_1 %.2f\n" % (step_val_pre_1 * 100) message += "[eval] f1_1 %.2f\n" % (step_val_f1_1 * 100) message += "[eval] recall_1 %.2f\n" % (step_val_rec_1 * 100) message += "[eval] auc_1 %.2f\n" % (step_val_auc_1 * 100) message += "[eval] n_data_in_strip %d\n" % train_val_n_data_in_strip message += "-----------------------------------------------------\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode) step_time, step_loss = 0.0, 0.0 # it is important to set step_time and step_loss return to zero. writer.close() message = "Finish training process.\n" utils.print_and_write_logging_file(self.logging_path, message, self.running_mode)