def create_finetune_model(input_shape, embedding_shape, yolo_model, anchors, num_seen): # freeze yolo model weights for i in range(len(yolo_model.layers)): yolo_model.layers[i].trainable = False print('Freeze yolo model layers.') print('Create finetune model.') h, w = input_shape num_anchors = len(anchors) num_classes, _ = embedding_shape anchor_input = KL.Input(shape=(num_anchors, 2)) y_true = [ KL.Input(shape=(h // [32, 16, 8][l], w // [32, 16, 8][l], num_anchors // 3, 5 + num_classes)) for l in range(3) ] y_embedding = KL.Input(shape=embedding_shape) model_plus_body = yolo_plus_body(yolo_model.inputs + [anchor_input], yolo_model.outputs, num_anchors // 3) model_loss = KL.Lambda(lambda x: yolo_loss( x, anchors=anchors, num_seen=num_seen, ignore_thresh=0.5, plus=True), name='yolo_loss')( [*model_plus_body.output, *y_true, y_embedding]) model = Model([*model_plus_body.input, *y_true, y_embedding], model_loss) return model
def create_model(input_shape, embedding_shape, anchors, num_seen, load_pretrained=True, weights_path='model_data/yolo_weights.h5'): """create the training model""" h, w = input_shape num_anchors = len(anchors) num_classes, _ = embedding_shape image_input = KL.Input(shape=input_shape + (3, )) y_true = [ KL.Input(shape=(h // [32, 16, 8][l], w // [32, 16, 8][l], num_anchors // 3, 5 + num_classes)) for l in range(3) ] y_embedding = KL.Input(shape=embedding_shape) model_body = yolo_body(image_input, num_anchors // 3) print('Create YOLOv3 model with {} anchors and {} classes.'.format( num_anchors, num_classes)) if load_pretrained: model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) print('Load weights {}.'.format(weights_path)) num = len(model_body.layers) - 3 for i in range(num): model_body.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format( num, len(model_body.layers))) model_loss = KL.Lambda( lambda x: yolo_loss( x, anchors=anchors, num_seen=num_seen, ignore_thresh=0.5), name='yolo_loss')([*model_body.output[3:], *y_true, y_embedding]) model = Model([model_body.input, *y_true, y_embedding], model_loss) return model_body, model
def model_object_mask_2_lambda(*args, **kwargs): dict_loss = yolo_loss(*args, **kwargs) return dict_loss['object_mask_2']
def model_output_2_lambda(*args, **kwargs): dict_loss = yolo_loss(*args, **kwargs) return dict_loss['yolo_output_2']
def model_grid_loss_confidence_2_lambda(*args, **kwargs): dict_loss = yolo_loss(*args, **kwargs) return dict_loss['confidence_loss_grid_2']
def model_grid_loss_class_2_lambda(*args, **kwargs): dict_loss = yolo_loss(*args, **kwargs) return dict_loss['class_loss_grid_2']
def model_grid_loss_wh_1_lambda(*args, **kwargs): dict_loss = yolo_loss(*args, **kwargs) return dict_loss['wh_loss_grid_1']
def model_grid_loss_xy_0_lambda(*args, **kwargs): dict_loss = yolo_loss(*args, **kwargs) return dict_loss['xy_loss_grid_0']
def model_grid_loss_lambda(*args, **kwargs): dict_loss = yolo_loss(*args, **kwargs) return dict_loss['loss']
def model_loss_lambda(*args, **kwargs): return yolo_loss(*args, **kwargs)['loss']
def loss_wrapper(outputs, pred, anchors, num_classes): return yolo_loss([*outputs, *pred], anchors=anchors, num_classes=num_classes, ignore_thresh=0.5, print_loss=False)
def main(argv=None): np.random.seed(0) safe_mkdir("output/abuse") input_h = 1013 input_w = 1919 inputs = tf.placeholder(tf.float32, [None, None, None, 3]) np.random.seed(0) img = np.asarray(Image.open('../data/page_based/tj.png')).astype( np.float32) inv_ratio_h = 416.0 / input_h inv_ratio_w = 416.0 / input_w target_box = np.asarray([[[ target_pos[0][0] * inv_ratio_w, target_pos[0][1] * inv_ratio_h, target_pos[1][0] * inv_ratio_w, target_pos[1][1] * inv_ratio_h, 0 ]]]) true_boxes = preprocess_true_boxes(target_box, [416, 416], anchors, 1) eps = 4.0 epochs = 500 mask_h = src_pos[1][1] - src_pos[0][1] mask_w = src_pos[1][0] - src_pos[0][0] mask_val = img[src_pos[0][1]:src_pos[1][1], src_pos[0][0]:src_pos[1][0]] mask = tf.Variable(initial_value=mask_val, dtype=tf.float32) padded_mask = tf.image.pad_to_bounding_box(mask, src_pos[0][1], src_pos[0][0], tf.shape(inputs)[1], tf.shape(inputs)[2]) black_box = tf.ones([mask_h, mask_w, 3], dtype=tf.float32) black_mask = 1.0 - tf.image.pad_to_bounding_box(black_box, src_pos[0][1], src_pos[0][0], tf.shape(inputs)[1], tf.shape(inputs)[2]), blacked_inputs = tf.multiply(inputs, black_mask) masked_input = tf.clip_by_value(tf.add(blacked_inputs, padded_mask), 0, 255) inputs_resized = tf.image.resize_images(masked_input, (416, 416), align_corners=True) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.25 sess = tf.Session(config=config) K.set_session(sess) sess.run(tf.global_variables_initializer()) model = load_model('../models/page_based_yolov3.h5') model.layers.pop(0) newInput = Input(tensor=inputs_resized / 255.) newOut = model(newInput) model = Model(newInput, newOut) y_true = [tf.placeholder(shape=(1, 416//{0:32, 1:16, 2:8}[l], 416//{0:32, 1:16, 2:8}[l], \ len(anchors)//3, 1+5), dtype=tf.float32) for l in range(3)] print([y.get_shape().as_list() for y in y_true]) loss = yolo_loss([*model.output, *y_true], anchors, 1) grad = tf.gradients(loss, mask)[0] opt = tf.train.AdamOptimizer(10.0) grad_ph = tf.placeholder(shape=grad.get_shape().as_list(), dtype=tf.float32) assign_op = opt.apply_gradients([(grad_ph, mask)]) sess.run(tf.variables_initializer(opt.variables())) assign_eps_op = tf.assign( mask, tf.clip_by_value(mask, mask_val - eps, mask_val + eps)) input_image_shape = K.placeholder(shape=(2, )) boxes, scores, classes = yolo_eval(model.output, anchors, 1, input_image_shape, score_threshold=0.5, iou_threshold=0.4) time_since_success = np.inf for i in range(epochs): curr_grad, curr_loss, curr_img, out_boxes, out_scores, out_classes = sess.run( [grad, loss, masked_input, boxes, scores, classes], feed_dict={ inputs: np.expand_dims(img, 0).astype(np.float32), input_image_shape: [416, 416], y_true[0]: np.asarray(true_boxes[0]), y_true[1]: np.asarray(true_boxes[1]), y_true[2]: np.asarray(true_boxes[2]), K.learning_phase(): 0 }) num_detect = len(out_boxes) print('test loss={:.3f}'.format(curr_loss), 'num_boxes={}'.format(num_detect)) sess.run(assign_op, feed_dict={ grad_ph: curr_grad / (np.linalg.norm(curr_grad.reshape(-1)) + 1e-8) }) sess.run(assign_eps_op) if ((i % 50 == 0) or (time_since_success > 50)) and num_detect > 0: img1 = draw_boxes(curr_img[0].astype(np.uint8), out_boxes, out_scores, out_classes) img1.save("output/abuse/tj_{}.png".format(i)) plt.imshow(img1) plt.show() time_since_success = 0 else: time_since_success += 1