def main(argv=None): img = Image.open(FLAGS.input_img) img_resized = img.resize(size=(FLAGS.size, FLAGS.size)) classes = load_coco_names(FLAGS.class_names) # placeholder for detector inputs inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3]) with tf.variable_scope('detector'): detections = yolo_v3(inputs, len(classes), data_format='NHWC') load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file) boxes = detections_boxes(detections) with tf.Session() as sess: sess.run(load_ops) frozen = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, ['concat_1']) graph_io.write_graph(frozen, './', 'yolo_v3.pb', as_text=False) detected_boxes = sess.run( boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save(FLAGS.output_img)
def main(argv=None): img = Image.open(FLAGS.input_img) img_resized = img.resize(size=(FLAGS.size, FLAGS.size)) classes = load_coco_names(FLAGS.class_names) # placeholder for detector inputs inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3]) with tf.variable_scope('detector'): #detections = yolo_v3(inputs, len(classes), data_format='NCHW') detections = yolo_v3(inputs, len(classes), data_format='NHWC') load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file) boxes = detections_boxes(detections) with tf.Session() as sess: sess.run(load_ops) detected_boxes = sess.run( boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save(FLAGS.output_img)
def detect_objs(files): classes = load_coco_names(FLAGS.class_names) # placeholder for detector inputs start = time.time() saver = tf.train.import_meta_graph('yolov3-coco.meta') graph = tf.get_default_graph() #for op in graph.get_operations(): # print(str(op.name)) inputs = graph.get_tensor_by_name("Placeholder:0") op_to_restore = graph.get_tensor_by_name("outputs:0") print(time.time() - start) with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint('./')) for f in files: start = time.time() img = Image.open(f) img_resized = img.resize(size=(FLAGS.size, FLAGS.size)) detected_boxes = sess.run( op_to_restore, {inputs: [np.array(img_resized, dtype=np.float32)]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save(os.path.join(FLAGS.output_dir, os.path.basename(f))) print(time.time() - start)
def calculate_loss(X, y): classes = load_coco_names(FLAGS.class_names) with tf.variable_scope('detector'): # detections = yolo_v3(X, len(classes), data_format='NCHW') detections = yolo_v3(X, len(classes), data_format='NHWC') detected_boxes = detections_boxes(detections) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold)
def main(argv=None): img = Image.open(FLAGS.input_img) img_resized = img.resize(size=(FLAGS.size, FLAGS.size)) classes = load_coco_names(FLAGS.class_names) # placeholder for detector inputs inputs = tf.placeholder(tf.float32, [1, FLAGS.size, FLAGS.size, 3], name="input") with tf.variable_scope('detector'): detections = yolo_v3(inputs, len(classes), data_format='NHWC') load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file) boxes = detections_boxes(detections) graph = tf.get_default_graph() output_graph = os.path.join(MODEL_DIR, MODEL_NAME) # PB模型保存路径 graph_def = graph.as_graph_def() with tf.Session() as sess: sess.run(load_ops) detected_boxes = sess.run( boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]}) output_graph_def = graph_util.convert_variables_to_constants( # 模型持久化,将变量值固定 sess, graph_def, ["output"] # 如果有多个输出节点,以逗号隔开 ) with tf.gfile.GFile(output_graph, "wb") as f: # 保存模型 f.write(output_graph_def.SerializeToString()) # 序列化输出 print("%d ops in the final graph." % len(output_graph_def.node)) # 得到当前图有几个操作节点 print("detected_boxes[0].shape:", detected_boxes[0].shape) print("detected_boxes:", detected_boxes) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save(FLAGS.output_img) print("done")
def main(argv=None): BASE_PATH = 'images' TEST_IMAGES = os.listdir(BASE_PATH) TEST_IMAGES.sort() print(TEST_IMAGES) # img = Image.open(FLAGS.input_img) # w,h = img.size # img_resized = img.resize(size=(FLAGS.size, FLAGS.size)) classes = load_coco_names(FLAGS.class_names) # placeholder for detector inputs inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3]) with tf.variable_scope('detector'): detections = yolo_v3(inputs, len(classes), data_format='NHWC')#Tensor("detector/yolo-v3/concat:0", shape=(?, 10647, 85), dtype=float32) load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file) boxes = detections_boxes(detections)#shape=(?, 10647, 85), dtype=float32) #coordinates of top left and bottom right points+num_class_confidence saver = tf.train.Saver() with tf.Session() as sess: sess.run(load_ops) writer =tf.summary.FileWriter("logs/",graph = sess.graph) writer.close() saver.save(sess,"models/yolov3.ckpt") for img in TEST_IMAGES: image_path = os.path.join(BASE_PATH, img) image = Image.open(image_path) w,h = image.size img_resized = image.resize(size=(FLAGS.size, FLAGS.size)) detected_boxes = sess.run(boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]}) filtered_boxes = non_max_suppression(detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, image, classes, (FLAGS.size, FLAGS.size)) plt.imshow(image) plt.show() image.save(FLAGS.output_img)
def main(argv=None): classes = load_coco_names(FLAGS.class_names) # Init tf Session config = tf.ConfigProto() if FLAGS.use_xla: jit_level = tf.OptimizerOptions.ON_1 config.graph_options.optimizer_options.global_jit_level = jit_level config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load the frozen graph load_graph(sess, FLAGS.model) # Get the input and output tensors tf_input = sess.graph.get_tensor_by_name('input:0') tf_output = sess.graph.get_tensor_by_name('output:0') # load image path(s) from disk img_paths = load_imgs(FLAGS.input_img, FLAGS.subset_file) det_boxes = "" for img_path in tqdm(img_paths): # load image in memory img = Image.open(img_path) # create input batch input_data = np.stack([np.array(img, dtype=np.float32)]) # infer bounding boxes detected_boxes = sess.run(tf_output, feed_dict={tf_input: input_data}) # TODO: look into using tf.image.non_max_suppression instead # merge boxes using nms filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) # add detected bbox to string and render them on a given image det_boxes += draw_boxes(filtered_boxes, img, classes, img_path) # save image with prediction boxes to disk if output dir specified if FLAGS.output_dir: img.save(os.path.join(FLAGS.output_dir, img_path.split('/')[-1])) # write detections to a file with open(FLAGS.output_file, 'w') as out: out.write(det_boxes)
def main(argv=None): classes = load_coco_names(FLAGS.class_names) # placeholder for detector inputs inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3]) config = tf.ConfigProto( #device_count={'GPU': 0}, #intra_op_parallelism_threads=1, #inter_op_parallelism_threads=1 ) sess = tf.Session(config=config) detections, boxes = init_yolo(sess, inputs, len(classes), FLAGS.weights_file, header_size=4) image_files = get_images(os.path.join(FLAGS.input_dir, 'images')) image_names = [get_file_name(f) for f in image_files] label_files = [ os.path.join(FLAGS.input_dir, 'labels', name + '.txt') for name in image_names ] assert np.all([os.path.isfile(lf) for lf in label_files]) safe_mkdir(FLAGS.output_dir) for idx, image_file in enumerate(image_files): print(image_file) img_orig = Image.open(image_file) img = img_orig.resize((416, 416)) t1 = timer() detected_boxes = sess.run( boxes, feed_dict={inputs: [np.array(img, dtype=np.float32)]}) t2 = timer() filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) t3 = timer() print("\tinference time: {}".format(t2 - t1)) print("\ttotal time: {}".format(t3 - t1)) draw_boxes(filtered_boxes, img_orig, classes, (FLAGS.size, FLAGS.size)) img_orig.save(FLAGS.output_dir + "/{}.png".format(idx))
def main(argv=None): parser = argparse.ArgumentParser() parser.add_argument('camid', type=int, help='source webcam id') args = parser.parse_args() classes = load_coco_names(FLAGS.class_names) np.random.seed(2018) colors = [np.random.randint(0, 255, 3) for _ in range(len(classes))] # placeholder for detector inputs inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3]) with tf.variable_scope('detector'): detections = yolo_v3(inputs, len(classes), data_format='NCHW') load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file) boxes = detections_boxes(detections) vc = cv2.VideoCapture() vc.open(args.camid) with tf.Session() as sess: sess.run(load_ops) while True: _, img = vc.read() img_resized = cv2.resize(img, dsize=(FLAGS.size, FLAGS.size)) img_resized = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB) detected_boxes = sess.run( boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) img = cv2.resize(img, (1920, 1080)) img = draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), colors=colors) # img_resized = draw_boxes(filtered_boxes, img_resized, classes, (FLAGS.size, FLAGS.size)) cv2.imshow('detections', img) # cv2.imshow('img_resized', img_resized) key = cv2.waitKey(1) if key == ord('q') or key & 0xFFFF == 27: break
def detect(self, image): """Detect ads in the image, return detection results as a dict. The return value is as follows: { 'size': [image_width, image_height], 'boxes': [ [x0, y0, x1, y1, probability], ... ], } """ img = image.resize((FLAGS.size, FLAGS.size)) if img.mode == 'RGBA': img = img.convert(mode='RGB') logging.info('Detecting ads') t1 = timer() detected_boxes = self.sess.run( self.boxes, feed_dict={self.inputs: [np.array(img, dtype=np.float32)]}, ) unique_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold, ) boxes = [ scale_box(box, image.size) + [float(p)] for box, p in unique_boxes[AD_TYPE] ] t2 = timer() logging.debug('Detected boxes: {}'.format(boxes)) logging.info('Detection complete: found {} ads in {} seconds'.format( len(boxes), t2 - t1)) return { 'size': image.size, 'boxes': boxes, 'detection_time': t2 - t1, }
def main(argv=None): img = Image.open(FLAGS.input_img) input_data = np.stack([np.array(img, dtype=np.float32)]) print("input shape: %s" % (input_data.shape, )) classes = load_coco_names(FLAGS.class_names) # Init tf Session config = tf.ConfigProto() if FLAGS.use_xla: jit_level = tf.OptimizerOptions.ON_1 config.graph_options.optimizer_options.global_jit_level = jit_level config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Load the frozen graph load_graph(sess, FLAGS.model) # Get the input and output tensors tf_input = sess.graph.get_tensor_by_name('input:0') print("input tensor:") print(tf_input) tf_output = sess.graph.get_tensor_by_name('output:0') print("output tensor:") print(tf_output) detected_boxes = sess.run(tf_output, feed_dict={tf_input: input_data}) # TODO: look into using tf.image.non_max_suppression instead filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) # pprint.pprint(filtered_boxes) draw_boxes(filtered_boxes, img, classes) img.save(FLAGS.output_img)
def detect_obj(file_path): img = Image.open(file_path) img_resized = img.resize(size=(FLAGS.size, FLAGS.size)) classes = load_coco_names(FLAGS.class_names) # placeholder for detector inputs inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3]) with tf.variable_scope('detector'): #detections = yolo_v3(inputs, len(classes), data_format='NCHW') detections = yolo_v3(inputs, len(classes), data_format='NHWC') #detections = yolo_v3_tiny(inputs, len(classes), data_format='NHWC') load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file) boxes = detections_boxes(detections) #saver = tf.train.Saver() with tf.Session() as sess: sess.run(load_ops) #saver.save(sess, "./yolov3-coco") detected_boxes = sess.run( boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]}) filtered_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save(os.path.join(FLAGS.output_dir, os.path.basename(file_path))) tf.reset_default_graph()
def main(argv=None): np.random.seed(0) safe_mkdir("output/footer/") classes = load_coco_names(FLAGS.class_names) input_h = 1013 input_w = 1919 inputs = tf.placeholder(tf.float32, [None, None, None, 3]) x_min = tf.placeholder(tf.int32, shape=()) y_min = tf.placeholder(tf.int32, shape=()) mask_h = 20 mask_w = input_w mask_val = np.zeros((mask_h, mask_w, 3), dtype=np.float32) mask = tf.Variable(initial_value=mask_val, dtype=tf.float32) padded_mask = tf.image.pad_to_bounding_box(tf.clip_by_value(mask, 0, 255), input_h - mask_h, x_min, input_h, input_w) black_box = np.ones_like(mask_val) padded_black_box = tf.image.pad_to_bounding_box(black_box, input_h - mask_h, x_min, input_h, input_w) masked_input = tf.clip_by_value( inputs * (1 - padded_black_box) + padded_mask, 0, 255) inputs_resized = tf.image.resize_images(masked_input, (FLAGS.size, FLAGS.size), align_corners=True) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.25 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) detections, boxes_tensor = init_yolo(sess, inputs_resized, len(classes), FLAGS.weights_file, header_size=4) X_train_paths, Y_train = get_input_files_and_labels( FLAGS.input_dir + "/train/", input_h, input_w) X_test_paths, Y_test = get_input_files_and_labels( FLAGS.input_dir + "/test/", input_h, input_w) print(len(X_train_paths), len(X_test_paths)) X_test = np.array([load_image(path) for path in X_test_paths]).astype(np.float32) epochs = 251 batch_size = 4 loss = tf.nn.relu(1.1 * FLAGS.conf_threshold - boxes_tensor[:, :, 4]) grad = tf.gradients(tf.reduce_sum(loss), mask)[0] opt = tf.train.AdamOptimizer(10.0) grad_ph = tf.placeholder(shape=grad.get_shape().as_list(), dtype=tf.float32) assign_op = opt.apply_gradients([(grad_ph, mask)]) sess.run(tf.variables_initializer(opt.variables())) assign_eps_op = tf.assign(mask, tf.clip_by_value(mask, 0, 32)) for epoch in range(epochs): if epoch % 50 == 0: # box example: class_idx => array[([x0, y0, x1, y1]] # {0: [ # (array([1101, 581, 1400, 1007]), 1.0), # (array([ 466, 140, 1436, 389]), 1.0), # (array([1419, 25, 1540, 69]), 1.0) # ] # } feed_dict = { inputs: X_test, } curr_loss, detected_boxes, curr_inputs = \ batch_eval(sess, [loss, boxes_tensor, masked_input], feed_dict, extra_feed={x_min: 0, y_min: 0}) curr_mask = sess.run(mask) res = Image.fromarray(curr_mask.astype(np.uint8)) res.save('output/footer/footer_{}.png'.format(epoch)) res.close() num_detect = [] for j in range(len(X_test)): filtered_boxes = \ non_max_suppression(detected_boxes[j:j+1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) img = Image.fromarray(curr_inputs[j].astype(np.uint8)) img.save("output/footer/img_{}_{}.png".format(epoch, j)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save("output/footer/img_boxes_{}_{}.png".format(epoch, j)) if False: my_dpi = 96 plt.figure(figsize=(input_h / my_dpi, input_w / my_dpi), dpi=my_dpi) plt.imshow(np.array(img)) plt.show() ground_truth = 0 if 0 not in Y_test[j] else len(Y_test[j][0]) if len(filtered_boxes) != 0: num_detect.append("{}/{}".format(len(filtered_boxes[0]), ground_truth)) else: num_detect.append("{}/{}".format(0, ground_truth)) print('test loss={:.3f}'.format(np.sum(curr_loss) / len(X_test)), 'num_boxes={}'.format(num_detect)) batch_idx = np.random.choice(len(X_train_paths), batch_size, replace=False) X_batch = np.array( [load_image(X_train_paths[idx]) for idx in batch_idx]) Y_batch = [Y_train[idx] for idx in batch_idx] for i in range(batch_size): if np.random.random() > 0.75: h = np.random.randint(20, 100) c = np.random.randint(0, 255, size=1) X_batch[i, -(mask_h + h):-mask_h, :mask_w, :] = c i = 0 start_score = 0 max_steps = 10 while i < max_steps: i += 1 feed_dict = {inputs: np.clip(X_batch, 0, 255), y_min: 0, x_min: 0} curr_grad, curr_loss, detected_boxes = \ sess.run([grad, loss, boxes_tensor], feed_dict=feed_dict) num_detect = [] tot_detected = 0 tot_surplus = 0 for j in range(batch_size): filtered_boxes = \ non_max_suppression(detected_boxes[j:j+1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) if i == 1: start_score = 0 if len(filtered_boxes) == 0 else len( filtered_boxes[0]) if len(filtered_boxes) != 0: num_detect.append("{}/{}".format(len(filtered_boxes[0]), len(Y_batch[j][0]))) tot_detected += len(filtered_boxes[0]) if len(filtered_boxes[0]) > len(Y_batch[j][0]) or len( filtered_boxes[0]) > start_score: tot_surplus += 1 else: num_detect.append("{}/{}".format(0, len(Y_batch[j][0]))) print(epoch, i, 'loss={:.3f}'.format(np.sum(curr_loss) / batch_size), 'num_boxes={}'.format(num_detect)) if tot_surplus == batch_size: i = 1000 else: sess.run(assign_op, feed_dict={ grad_ph: curr_grad / (np.linalg.norm(curr_grad) + 1e-8) }) sess.run(assign_eps_op) if i == max_steps: print("no junk")
def main(argv=None): np.random.seed(0) safe_mkdir("output/bbc_false_positive") classes = load_coco_names(FLAGS.class_names) input_w = 1919 inputs = tf.placeholder(tf.float32, [None, None, None, 3]) mask_h = 50 mask_w = input_w mask = tf.Variable(initial_value=np.zeros((mask_h, mask_w, 3)), dtype=tf.float32) mask_resized = mask mask_resized = tf.image.pad_to_bounding_box(mask_resized, 65, 0, tf.shape(inputs)[1], tf.shape(inputs)[2]) masked_input = tf.clip_by_value(tf.add(inputs, mask_resized), 0, 255) inputs_resized = tf.image.resize_images(masked_input, (FLAGS.size, FLAGS.size), align_corners=True) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.25 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) image_files = np.array(get_images(os.path.join(FLAGS.input_dir, 'images'))) X_train = image_files[:80] X_test = image_files[80:] eps = 4.0 epochs = 121 batch_size = 8 detections, boxes_tensor = init_yolo(sess, inputs_resized, len(classes), FLAGS.weights_file, header_size=4) loss = tf.nn.relu(1.1 * FLAGS.conf_threshold - boxes_tensor[:, :, 4]) grad = tf.gradients(tf.reduce_sum(loss), mask)[0] full_grad = tf.placeholder(dtype=tf.float32, shape=mask.shape) opt = tf.train.AdamOptimizer(1) assign_op = opt.apply_gradients([(full_grad, mask)]) sess.run(tf.variables_initializer(opt.variables())) eps_assign_op = tf.assign(mask, tf.clip_by_value(mask, -eps, eps)) for epoch in range(epochs): if epoch % 10 == 0: X_test_imgs = np.array([ np.array(load_image(image_file), dtype=np.float32) for image_file in X_test ]) feed_dict = { inputs: X_test_imgs, } curr_loss, detected_boxes, curr_inputs = \ batch_eval(sess, [loss, boxes_tensor, masked_input], feed_dict) print(detected_boxes.shape) curr_mask = sess.run(mask) res = Image.fromarray(curr_mask.astype(np.uint8)) res.save('output/bbc_false_positive/mask_{}.png'.format(epoch)) res.close() num_detect = [] for j in range(len(X_test)): filtered_boxes = \ non_max_suppression(detected_boxes[j:j+1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) img = Image.fromarray(curr_inputs[j].astype(np.uint8)) img.save("output/bbc_false_positive/img_{}_{}.png".format( epoch, j)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save( "output/bbc_false_positive/img_boxes_{}_{}.png".format( epoch, j)) img.close() if len(filtered_boxes) != 0: num_detect.append(len(filtered_boxes[0])) print('test loss={:.3f}'.format(np.sum(curr_loss) / len(X_test)), 'num_boxes={}'.format(num_detect)) batch_idx = np.random.choice(len(X_train), batch_size, replace=False) X_batch = np.array([ np.array(load_image(image_file), dtype=np.float32) for image_file in X_train[batch_idx] ]) i = 0 orig_detected = [1] * batch_size feed_dict = { inputs: X_batch, } while i < 50: i += 1 curr_grad, curr_loss, detected_boxes = \ sess.run([grad, loss, boxes_tensor], feed_dict=feed_dict) num_detect = [] for j in range(batch_size): filtered_boxes = \ non_max_suppression(detected_boxes[j:j+1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) if len(filtered_boxes) != 0: num_detect.append(len(filtered_boxes[0])) else: num_detect.append(0) print(epoch, i, 'loss={:.3f}'.format(np.sum(curr_loss) / batch_size), 'num_boxes={}/{}'.format(num_detect, batch_size)) if np.all(np.array(num_detect) > np.array(orig_detected)): i = 1000 sess.run(assign_op, feed_dict={ full_grad: curr_grad / (np.linalg.norm(curr_grad) + 1e-8) }) sess.run(eps_assign_op)
def main(argv): classes = load_coco_names(FLAGS.class_names) inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3]) config = tf.ConfigProto() sess = tf.Session(config=config) detections, boxes = init_yolo( sess, inputs, len(classes), FLAGS.weights_file, header_size=4, ) image_meta = load_image_metadata(FLAGS.input_dir) safe_mkdir(FLAGS.output_dir) summary = { 'flags': { 'input_dir': FLAGS.input_dir, 'output_dir': FLAGS.output_dir, 'size': FLAGS.size, 'suppression_threshold': FLAGS.supp_threshold, 'detection_threshold': FLAGS.conf_threshold, 'iou_threshold': FLAGS.iou_threshold, 'match_threshold': FLAGS.match_threshold, }, 'images': [], } for idx, (image_file, regions) in enumerate(image_meta): in_name = os.path.basename(image_file) out_name = '{}.png'.format(idx) print(in_name, '->', out_name) img_orig = Image.open(image_file) img = img_orig.resize((416, 416)) if img.mode == 'RGBA': img = img.convert(mode='RGB') t1 = timer() detected_boxes = sess.run( boxes, feed_dict={inputs: [np.array(img, dtype=np.float32)]}, ) t2 = timer() unique_boxes = non_max_suppression( detected_boxes, confidence_threshold=FLAGS.supp_threshold, iou_threshold=FLAGS.iou_threshold, ) filtered_boxes = { rtype: [ (box, p) for box, p in regions if p > FLAGS.conf_threshold ] for rtype, regions in unique_boxes.items() } scaled_regions = scale_regions(regions, FLAGS.size) tp, fn, fp = compare(filtered_boxes, scaled_regions) t3 = timer() print('\ttotal time: {}'.format(t3 - t1)) print('\tTP:{} FN:{} FP:{} Recall:{:.2%} Precision:{:.2%}' .format(tp, fn, fp, tp / (tp + fn + 1e-5), tp / (tp + fp + 1e-5))) draw_boxes(scaled_regions, img_orig, classes, (FLAGS.size, FLAGS.size), (0, 255, 0)) draw_boxes(filtered_boxes, img_orig, classes, (FLAGS.size, FLAGS.size)) img_orig.save(os.path.join(FLAGS.output_dir, out_name)) summary['images'].append({ 'in_name': in_name, 'out_name': out_name, 'nn_time': t2 - t1, 'total_time': t3 - t1, 'tp': tp, 'fn': fn, 'fp': fp, 'detected_boxes': conv_boxes(unique_boxes), 'boxes_above_threshold': conv_boxes(filtered_boxes), 'marked_boxes': conv_boxes(scaled_regions), }) finalize_summary( summary, os.path.join(FLAGS.output_dir, 'summary.json'), )
def main(argv=None): np.random.seed(0) safe_mkdir("output/overflow/") classes = load_coco_names(FLAGS.class_names) input_h = 1013 input_w = 1919 inputs = tf.placeholder(tf.float32, [None, None, None, 3]) X_train_paths, Y_train = get_input_files_and_labels( FLAGS.input_dir + "/train/", input_h, input_w) X_test_paths, Y_test = get_input_files_and_labels( FLAGS.input_dir + "/test/", input_h, input_w) print(len(X_train_paths), len(X_test_paths)) X_test = np.array([load_image(path) for path in X_test_paths]) epochs = 201 batch_size = 8 mask_tile = 8 mask_val = np.zeros((input_h // mask_tile, input_w // mask_tile, 3), dtype=np.float32) mask = tf.Variable(initial_value=mask_val, dtype=tf.float32) slack_h = input_h - mask_val.shape[0] * mask_tile slack_w = input_w - mask_val.shape[1] * mask_tile tiled_mask = tf.image.pad_to_bounding_box( tf.tile(mask, [mask_tile, mask_tile, 1]), slack_h // 2, slack_w // 2, input_h, input_w) alpha = 0.01 masked_input = tf.clip_by_value((1 - alpha) * inputs + alpha * tiled_mask, 0, 255) inputs_resized = tf.image.resize_images(masked_input, (FLAGS.size, FLAGS.size), align_corners=True) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.25 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) detections, boxes_tensor = init_yolo(sess, inputs_resized, len(classes), FLAGS.weights_file, header_size=4) loss = tf.nn.relu(1.1 * FLAGS.conf_threshold - boxes_tensor[:, :, 4]) grad = tf.gradients(tf.reduce_sum(loss), mask)[0] grad_img = tf.gradients(loss, inputs)[0] full_grad = tf.placeholder(dtype=tf.float32, shape=mask.shape) opt = tf.train.AdamOptimizer(1.0) assign_op = opt.apply_gradients([(full_grad, mask)]) sess.run(tf.variables_initializer(opt.variables())) assign_eps_op = tf.assign(mask, tf.clip_by_value(mask, 0, 255)) for epoch in range(epochs): if epoch % 50 == 0: mask_np = sess.run(mask) mask_img = Image.fromarray(mask_np.astype(np.uint8)) mask_img.save("output/overflow/mask_{}.png".format(epoch)) mask_img.close() X_test_copy = np.copy(X_test).astype(np.float32) X_test_copy = np.clip(X_test_copy, 0, 255) feed_dict = {inputs: X_test_copy} curr_loss, detected_boxes, curr_inputs = \ batch_eval(sess, [loss, boxes_tensor, masked_input], feed_dict) num_detect = [] num_evaded = 0 for j in range(len(X_test)): filtered_boxes = \ non_max_suppression(detected_boxes[j:j+1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) img = Image.fromarray(curr_inputs[j].astype(np.uint8)) img.save("output/overflow/img_{}_{}.png".format(epoch, j)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save("output/overflow/img_boxes_{}_{}.png".format( epoch, j)) img.close() if False: my_dpi = 96 plt.figure(figsize=(input_h / my_dpi, input_w / my_dpi), dpi=my_dpi) plt.imshow(np.array(img)) plt.show() ground_truth = 0 if 0 not in Y_test[j] else len(Y_test[j][0]) if len(filtered_boxes) != 0: if len(filtered_boxes[0]) < ground_truth: num_evaded += 1 num_detect.append("{}/{}".format(len(filtered_boxes[0]), ground_truth)) else: num_evaded += 1 num_detect.append("{}/{}".format(0, ground_truth)) print('test loss={:.3f}'.format(np.sum(curr_loss) / len(X_test)), 'num_boxes={}'.format(num_detect)) print("evaded {} ads".format(num_evaded)) batch_idx = np.random.choice(len(X_train_paths), batch_size, replace=False) X_batch = np.array( [load_image(X_train_paths[idx]) for idx in batch_idx]) Y_batch = [Y_train[idx] for idx in batch_idx] jitter_x_low, jitter_x_high = -500, 500 jitter_y_low, jitter_y_high = -50, 50 jitters_x = np.zeros(len(X_batch)) jitters_y = np.zeros(len(X_batch)) for batch_idx in range(len(X_batch)): boxes = Y_batch[batch_idx][0] for (box, conf) in boxes: x0, y0, x1, y1 = box h = y1 - y0 w = x1 - x0 low_x = max(-x0, jitter_x_low) high_x = min(input_w - x1, jitter_x_high) jitter_x = np.random.randint(low_x, high_x) low_y = max(-y0, jitter_y_low) high_y = min(input_h - y1, jitter_y_high) jitter_y = np.random.randint(low_y, high_y) jitters_x[batch_idx] = jitter_x jitters_y[batch_idx] = jitter_y ad = X_batch[batch_idx, y0:y1, x0:x1, :].copy() background = X_batch[batch_idx, min(y0 + 5, input_h - 1), min(x1 + 5, input_w - 1), :] X_batch[batch_idx, y0 - 5:y1 + 5, x0:x1, :] = background y0 = y0 + jitter_y x0 = x0 + jitter_x X_batch[batch_idx, y0:y0 + h, x0:x0 + w, :] = ad max_steps = 10 i = 0 num_original = [] while i < max_steps: i += 1 feed_dict = {inputs: np.clip(X_batch, 0, 255)} curr_grad, curr_grad_img, curr_loss, detected_boxes = \ sess.run([grad, grad_img, loss, boxes_tensor], feed_dict=feed_dict) num_detect = [] num_evaded = 0 for j in range(batch_size): filtered_boxes = \ non_max_suppression(detected_boxes[j:j+1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) if i == 1: if len(filtered_boxes) != 0: num_original.append(len(filtered_boxes[0])) else: num_original.append(0) if len(filtered_boxes) != 0: if len(filtered_boxes[0]) < num_original[j]: num_evaded += num_original[j] - len(filtered_boxes[0]) num_detect.append("{}/{}".format(len(filtered_boxes[0]), num_original[j])) else: num_evaded += num_original[j] num_detect.append("{}/{}".format(0, num_original[j])) print(epoch, i, 'loss={:.3f}'.format(np.sum(curr_loss) / batch_size), 'num_boxes={}'.format(num_detect)) sess.run(assign_op, feed_dict={ full_grad: curr_grad / (np.linalg.norm(curr_grad) + 1e-8) }) sess.run(assign_eps_op)
def main(argv=None): safe_mkdir("output/scroll/") np.random.seed(0) classes = load_coco_names(FLAGS.class_names) input_h = 1013 input_w = 1919 inputs = tf.placeholder(tf.float32, [None, None, None, 3]) inputs_resized = tf.image.resize_images(inputs, (FLAGS.size, FLAGS.size), align_corners=True) if FLAGS.mask: alpha = tf.placeholder(shape=(2, 1, 1, 1), dtype=tf.float32) mask_tile = 8 mask_val = load_image(FLAGS.mask) mask = tf.Variable(initial_value=mask_val, dtype=tf.float32) slack_h = input_h - mask_val.shape[0] * mask_tile slack_w = input_w - mask_val.shape[1] * mask_tile tiled_mask = tf.image.pad_to_bounding_box(tf.tile(mask, [mask_tile, mask_tile, 1]), slack_h // 2, slack_w // 2, input_h, input_w) masked_input = tf.clip_by_value((1 - alpha) * inputs + alpha * tiled_mask, 0, 255) inputs_resized = tf.image.resize_images(masked_input, (FLAGS.size, FLAGS.size), align_corners=True) if FLAGS.footer: footer = load_image(FLAGS.footer) footer_h = footer.shape[0] config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.25 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) detections, boxes_tensor = init_yolo(sess, inputs_resized, len(classes), FLAGS.weights_file, header_size=4) full_page = Image.open(FLAGS.full_page) w, h = full_page.size ratio = w / (1.0 * input_w) new_h = int(h * ratio) full_page = np.array(full_page.resize((input_w, new_h))).astype(np.float32) full_page = cv2.cvtColor(full_page, cv2.COLOR_RGBA2RGB) to_scroll = new_h - input_h print(to_scroll) num_outputs = 100 scroll_dh = to_scroll // num_outputs for i in range(num_outputs): img = full_page[i*scroll_dh:i*scroll_dh + input_h, :, :].astype(np.float32) img_adv = img.copy().astype(np.float32) feed_dict = { inputs: [img, img_adv] } if FLAGS.footer: img_adv[-footer_h:, :, :] = footer if FLAGS.mask: feed_dict[alpha] = [[[[0.0]]], [[[0.01]]]] detected_boxes = sess.run(boxes_tensor, feed_dict=feed_dict) filtered_boxes = non_max_suppression(detected_boxes[:1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) filtered_boxes_adv = non_max_suppression(detected_boxes[1:], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) num_ads = 0 if len(filtered_boxes) == 0 else len(filtered_boxes[0]) num_ads_adv = 0 if len(filtered_boxes_adv) == 0 else len(filtered_boxes_adv[0]) print(i, num_ads, num_ads_adv) img = Image.fromarray(img.astype(np.uint8)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save("output/scroll/img_boxes_{0:03d}.png".format(i)) img_adv = Image.fromarray(img_adv.astype(np.uint8)) draw_boxes(filtered_boxes_adv, img_adv, classes, (FLAGS.size, FLAGS.size)) img_adv.save("output/scroll/img_adv_boxes_{0:03d}.png".format(i))
def main(argv=None): np.random.seed(0) safe_mkdir("output/bbc_evade") classes = load_coco_names(FLAGS.class_names) input_h = 1013 input_w = 1919 inputs = tf.placeholder(tf.float32, [None, None, None, 3]) x_min = tf.placeholder(tf.int32, shape=[None]) y_min = tf.placeholder(tf.int32, shape=[None]) x_min2 = tf.placeholder(tf.int32, shape=[None]) y_min2 = tf.placeholder(tf.int32, shape=[None]) mask_h = 40 mask_w = 820 + 200 mask = tf.Variable( initial_value=255 + 0 * np.random.randint(low=0, high=255, size=(mask_h, mask_w, 3)), dtype=tf.float32) padded_mask = tf.map_fn( lambda dims: tf.image.pad_to_bounding_box(mask, dims[0], dims[1], tf.shape(inputs)[1], tf.shape(inputs)[2]), (y_min, x_min), dtype=tf.float32) black_box = tf.ones([mask_h, mask_w, 3], dtype=tf.float32) black_mask = 1.0 - tf.map_fn( lambda dims: tf.image.pad_to_bounding_box(black_box, dims[0], dims[1], tf.shape(inputs)[1], tf.shape(inputs)[2]), (y_min, x_min), dtype=tf.float32) blacked_inputs = tf.multiply(inputs, black_mask) masked_input = tf.clip_by_value(tf.add(blacked_inputs, padded_mask), 0, 255) inputs_resized = tf.image.resize_images(masked_input, (FLAGS.size, FLAGS.size), align_corners=True) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.25 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) image_files = np.array(get_images(os.path.join(FLAGS.input_dir, 'images'))) np.random.seed(0) np.random.shuffle(image_files) image_names = [get_file_name(f) for f in image_files] label_files = [ os.path.join(FLAGS.input_dir, 'labels', name + '.txt') for name in image_names ] assert np.all([os.path.isfile(lf) for lf in label_files]) all_labels = np.array( [load_labels(label_file) for label_file in label_files]) all_labels = [ convert_labels(labels, (input_w, input_h)) for labels in all_labels ] X_train = image_files[:80] Y_train = all_labels[:80] X_test = image_files[80:] Y_test = all_labels[80:] X_test = np.array([ np.array(load_image(image_file), dtype=np.float32) for image_file in X_test ]) print(len(X_train), len(X_test)) epochs = 151 batch_size = 8 detections, boxes_tensor = init_yolo(sess, inputs_resized, len(classes), FLAGS.weights_file, header_size=4) loss = tf.reduce_sum( tf.nn.relu(boxes_tensor[:, :, 4] - 0.1 * FLAGS.conf_threshold)) grad = tf.gradients(loss, mask)[0] full_grad = tf.placeholder(dtype=tf.float32, shape=mask.shape) eps = 3.0 opt = tf.train.AdamOptimizer(1.0) assign_op = opt.apply_gradients([(full_grad, mask)]) sess.run(tf.variables_initializer(opt.variables())) for epoch in range(epochs): if epoch % 10 == 0: print_idx = (epoch // 10) % len(X_test) boxes = [label[0][0][0] for label in Y_test] x0 = [box[0] + 5 - 100 for box in boxes] y1 = [box[-1] - 10 for box in boxes] y0 = [box[1] + 5 for box in boxes] feed_dict = { inputs: X_test, x_min: x0, y_min: y1, x_min2: x0, y_min2: y0 } detected_boxes, curr_inputs = batch_eval( sess, [boxes_tensor, masked_input], feed_dict) num_detect = [] for j in range(len(X_test)): filtered_boxes = \ non_max_suppression(detected_boxes[j:j+1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) img = Image.fromarray(curr_inputs[j].astype(np.uint8)) img.save("output/bbc_evade/img_{}_{}.png".format(epoch, j)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) img.save("output/bbc_evade/img_boxes_{}_{}.png".format( epoch, j)) if False: img_masked = sess.run(masked_input, feed_dict=feed_dict) img = Image.fromarray(img_masked[print_idx].astype( np.uint8)) draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) my_dpi = 96 plt.figure(figsize=(input_h / my_dpi, input_w / my_dpi), dpi=my_dpi) plt.imshow(np.array(img)) plt.show() if len(filtered_boxes) > 0: num_detect.append(len(filtered_boxes[0])) else: num_detect.append(0) print('num_boxes={}'.format(num_detect)) batch_idx = np.random.choice(len(X_train), batch_size, replace=False) X_batch = np.array([ np.array(load_image(image_file), dtype=np.float32) for image_file in X_train[batch_idx] ]) Y_batch = [Y_train[i] for i in batch_idx] ad_idx = np.random.choice(len(X_batch), batch_size, replace=True) for i in range(batch_size): x0, y0, x1, y1 = Y_batch[ad_idx[i]][0][0][0] ad = X_batch[ad_idx[i], y0:y1, x0:x1, :] x0b, y0b, x1b, y1b = Y_batch[i][0][0][0] x1b = x0b + (x1 - x0) y1b = y0b + (y1 - y0) X_batch[i, y0b:y1b, x0b:x1b, :] = ad boxes = [label[0][0][0] for label in Y_batch] x0 = [box[0] + 5 - 100 for box in boxes] y1 = [box[-1] - 10 for box in boxes] y0 = [box[1] + 5 for box in boxes] i = 0 stop = False while not stop: i += 1 feed_dict = { inputs: X_batch, x_min: x0, y_min: y1, x_min2: x0, y_min2: y0 } curr_grad, curr_loss, detected_boxes = \ sess.run([grad, loss, boxes_tensor], feed_dict=feed_dict) num_detect = 0 for j in range(batch_size): filtered_boxes = \ non_max_suppression(detected_boxes[j:j+1], confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold) if len(filtered_boxes) != 0: num_detect += len(filtered_boxes[0]) print(epoch, i, 'loss={:.3f}'.format(curr_loss / batch_size), 'num_boxes={}/{}'.format(num_detect, batch_size)) if (num_detect == 0) or (i >= 50): stop = True sess.run(assign_op, feed_dict={ full_grad: curr_grad / (np.linalg.norm(curr_grad) + 1e-8) }) sess.run(tf.assign(mask, tf.clip_by_value(mask, 255 - eps, 255)))