Esempi in Python per non_max_suppression, esempi in Python per yolo_v3.non_max_suppression

Esempio n. 1

0

Mostra file

def main(argv=None):
    img = Image.open(FLAGS.input_img)
    img_resized = img.resize(size=(FLAGS.size, FLAGS.size))

    classes = load_coco_names(FLAGS.class_names)

    # placeholder for detector inputs
    inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3])

    with tf.variable_scope('detector'):
        detections = yolo_v3(inputs, len(classes), data_format='NHWC')
        load_ops = load_weights(tf.global_variables(scope='detector'),
                                FLAGS.weights_file)

    boxes = detections_boxes(detections)

    with tf.Session() as sess:
        sess.run(load_ops)
        frozen = tf.graph_util.convert_variables_to_constants(
            sess, sess.graph_def, ['concat_1'])
        graph_io.write_graph(frozen, './', 'yolo_v3.pb', as_text=False)
        detected_boxes = sess.run(
            boxes,
            feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]})

    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)

    draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size))

    img.save(FLAGS.output_img)

Esempio n. 2

0

Mostra file

File: demo.py Progetto: sudo-install-MW/kaggle_ship_detection

def main(argv=None):
    img = Image.open(FLAGS.input_img)
    img_resized = img.resize(size=(FLAGS.size, FLAGS.size))

    classes = load_coco_names(FLAGS.class_names)

    # placeholder for detector inputs
    inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3])

    with tf.variable_scope('detector'):
        #detections = yolo_v3(inputs, len(classes), data_format='NCHW')
        detections = yolo_v3(inputs, len(classes), data_format='NHWC')
        load_ops = load_weights(tf.global_variables(scope='detector'),
                                FLAGS.weights_file)

    boxes = detections_boxes(detections)

    with tf.Session() as sess:
        sess.run(load_ops)

        detected_boxes = sess.run(
            boxes,
            feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]})

    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)

    draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size))

    img.save(FLAGS.output_img)

Esempio n. 3

0

Mostra file

File: object_detector_saved_model.py Progetto: lucdew/ml-experiments

def detect_objs(files):
    classes = load_coco_names(FLAGS.class_names)
    # placeholder for detector inputs

    start = time.time()
    saver = tf.train.import_meta_graph('yolov3-coco.meta')
    graph = tf.get_default_graph()
    #for op in graph.get_operations():
    #   print(str(op.name))
    inputs = graph.get_tensor_by_name("Placeholder:0")
    op_to_restore = graph.get_tensor_by_name("outputs:0")

    print(time.time() - start)

    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint('./'))
        for f in files:
            start = time.time()
            img = Image.open(f)
            img_resized = img.resize(size=(FLAGS.size, FLAGS.size))
            detected_boxes = sess.run(
                op_to_restore,
                {inputs: [np.array(img_resized, dtype=np.float32)]})
            filtered_boxes = non_max_suppression(
                detected_boxes,
                confidence_threshold=FLAGS.conf_threshold,
                iou_threshold=FLAGS.iou_threshold)

            draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size))
            img.save(os.path.join(FLAGS.output_dir, os.path.basename(f)))
            print(time.time() - start)

Esempio n. 4

0

Mostra file

File: training.py Progetto: sudo-install-MW/kaggle_ship_detection

def calculate_loss(X, y):
    classes = load_coco_names(FLAGS.class_names)

    with tf.variable_scope('detector'):
        # detections = yolo_v3(X, len(classes), data_format='NCHW')
        detections = yolo_v3(X, len(classes), data_format='NHWC')

    detected_boxes = detections_boxes(detections)
    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)

Esempio n. 5

0

Mostra file

def main(argv=None):
    img = Image.open(FLAGS.input_img)
    img_resized = img.resize(size=(FLAGS.size, FLAGS.size))

    classes = load_coco_names(FLAGS.class_names)

    # placeholder for detector inputs
    inputs = tf.placeholder(tf.float32, [1, FLAGS.size, FLAGS.size, 3],
                            name="input")

    with tf.variable_scope('detector'):
        detections = yolo_v3(inputs, len(classes), data_format='NHWC')
        load_ops = load_weights(tf.global_variables(scope='detector'),
                                FLAGS.weights_file)

    boxes = detections_boxes(detections)

    graph = tf.get_default_graph()
    output_graph = os.path.join(MODEL_DIR, MODEL_NAME)  # PB模型保存路径
    graph_def = graph.as_graph_def()

    with tf.Session() as sess:
        sess.run(load_ops)

        detected_boxes = sess.run(
            boxes,
            feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]})

        output_graph_def = graph_util.convert_variables_to_constants(  # 模型持久化，将变量值固定
            sess,
            graph_def,
            ["output"]  # 如果有多个输出节点，以逗号隔开
        )

        with tf.gfile.GFile(output_graph, "wb") as f:  # 保存模型
            f.write(output_graph_def.SerializeToString())  # 序列化输出

        print("%d ops in the final graph." %
              len(output_graph_def.node))  # 得到当前图有几个操作节点

    print("detected_boxes[0].shape:", detected_boxes[0].shape)
    print("detected_boxes:", detected_boxes)

    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)

    draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size))

    img.save(FLAGS.output_img)
    print("done")

Esempio n. 6

0

Mostra file

def main(argv=None):
    
    BASE_PATH = 'images'
    TEST_IMAGES = os.listdir(BASE_PATH)
    TEST_IMAGES.sort()
    print(TEST_IMAGES)
    
    
#     img = Image.open(FLAGS.input_img)
#     w,h = img.size
#     img_resized = img.resize(size=(FLAGS.size, FLAGS.size))

    classes = load_coco_names(FLAGS.class_names)

    # placeholder for detector inputs
    inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3])

    with tf.variable_scope('detector'):
        detections = yolo_v3(inputs, len(classes), data_format='NHWC')#Tensor("detector/yolo-v3/concat:0", shape=(?, 10647, 85), dtype=float32)
        load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file)

    boxes = detections_boxes(detections)#shape=(?, 10647, 85), dtype=float32)
    #coordinates of top left and bottom right points+num_class_confidence

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(load_ops)

        writer =tf.summary.FileWriter("logs/",graph = sess.graph)
        writer.close()
        saver.save(sess,"models/yolov3.ckpt")
        
        for img in TEST_IMAGES:
            image_path = os.path.join(BASE_PATH, img)
       
            image = Image.open(image_path)
            w,h = image.size
            img_resized = image.resize(size=(FLAGS.size, FLAGS.size))
          
            detected_boxes = sess.run(boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]})

            filtered_boxes = non_max_suppression(detected_boxes, confidence_threshold=FLAGS.conf_threshold,
                                         iou_threshold=FLAGS.iou_threshold)

            draw_boxes(filtered_boxes, image, classes, (FLAGS.size, FLAGS.size))
            
            plt.imshow(image)
            plt.show()

            image.save(FLAGS.output_img)

Esempio n. 7

0

Mostra file

def main(argv=None):
    classes = load_coco_names(FLAGS.class_names)

    # Init tf Session
    config = tf.ConfigProto()
    if FLAGS.use_xla:
        jit_level = tf.OptimizerOptions.ON_1
        config.graph_options.optimizer_options.global_jit_level = jit_level
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    sess.run(init)

    # Load the frozen graph
    load_graph(sess, FLAGS.model)

    # Get the input and output tensors
    tf_input = sess.graph.get_tensor_by_name('input:0')
    tf_output = sess.graph.get_tensor_by_name('output:0')

    # load image path(s) from disk
    img_paths = load_imgs(FLAGS.input_img, FLAGS.subset_file)
    det_boxes = ""
    for img_path in tqdm(img_paths):
        # load image in memory
        img = Image.open(img_path)

        # create input batch
        input_data = np.stack([np.array(img, dtype=np.float32)])

        # infer bounding boxes
        detected_boxes = sess.run(tf_output, feed_dict={tf_input: input_data})

        # TODO: look into using tf.image.non_max_suppression instead
        # merge boxes using nms
        filtered_boxes = non_max_suppression(
            detected_boxes, confidence_threshold=FLAGS.conf_threshold, iou_threshold=FLAGS.iou_threshold)

        # add detected bbox to string and render them on a given image
        det_boxes += draw_boxes(filtered_boxes, img, classes, img_path)

        # save image with prediction boxes to disk if output dir specified
        if FLAGS.output_dir:
            img.save(os.path.join(FLAGS.output_dir, img_path.split('/')[-1]))

    # write detections to a file
    with open(FLAGS.output_file, 'w') as out:
        out.write(det_boxes)

Esempio n. 8

0

Mostra file

def main(argv=None):
    classes = load_coco_names(FLAGS.class_names)

    # placeholder for detector inputs
    inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3])

    config = tf.ConfigProto(
        #device_count={'GPU': 0},
        #intra_op_parallelism_threads=1,
        #inter_op_parallelism_threads=1
    )
    sess = tf.Session(config=config)
    detections, boxes = init_yolo(sess,
                                  inputs,
                                  len(classes),
                                  FLAGS.weights_file,
                                  header_size=4)

    image_files = get_images(os.path.join(FLAGS.input_dir, 'images'))
    image_names = [get_file_name(f) for f in image_files]
    label_files = [
        os.path.join(FLAGS.input_dir, 'labels', name + '.txt')
        for name in image_names
    ]
    assert np.all([os.path.isfile(lf) for lf in label_files])

    safe_mkdir(FLAGS.output_dir)

    for idx, image_file in enumerate(image_files):
        print(image_file)
        img_orig = Image.open(image_file)
        img = img_orig.resize((416, 416))

        t1 = timer()
        detected_boxes = sess.run(
            boxes, feed_dict={inputs: [np.array(img, dtype=np.float32)]})
        t2 = timer()

        filtered_boxes = non_max_suppression(
            detected_boxes,
            confidence_threshold=FLAGS.conf_threshold,
            iou_threshold=FLAGS.iou_threshold)
        t3 = timer()
        print("\tinference time: {}".format(t2 - t1))
        print("\ttotal time: {}".format(t3 - t1))

        draw_boxes(filtered_boxes, img_orig, classes, (FLAGS.size, FLAGS.size))
        img_orig.save(FLAGS.output_dir + "/{}.png".format(idx))

Esempio n. 9

0

Mostra file

File: demo.py Progetto: kylehounslow/bmw_2019

def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('camid', type=int, help='source webcam id')
    args = parser.parse_args()
    classes = load_coco_names(FLAGS.class_names)
    np.random.seed(2018)
    colors = [np.random.randint(0, 255, 3) for _ in range(len(classes))]

    # placeholder for detector inputs
    inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3])

    with tf.variable_scope('detector'):
        detections = yolo_v3(inputs, len(classes), data_format='NCHW')
        load_ops = load_weights(tf.global_variables(scope='detector'),
                                FLAGS.weights_file)

    boxes = detections_boxes(detections)
    vc = cv2.VideoCapture()
    vc.open(args.camid)
    with tf.Session() as sess:
        sess.run(load_ops)
        while True:
            _, img = vc.read()
            img_resized = cv2.resize(img, dsize=(FLAGS.size, FLAGS.size))
            img_resized = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)

            detected_boxes = sess.run(
                boxes,
                feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]})
            filtered_boxes = non_max_suppression(
                detected_boxes,
                confidence_threshold=FLAGS.conf_threshold,
                iou_threshold=FLAGS.iou_threshold)
            img = cv2.resize(img, (1920, 1080))
            img = draw_boxes(filtered_boxes,
                             img,
                             classes, (FLAGS.size, FLAGS.size),
                             colors=colors)
            # img_resized = draw_boxes(filtered_boxes, img_resized, classes, (FLAGS.size, FLAGS.size))
            cv2.imshow('detections', img)
            # cv2.imshow('img_resized', img_resized)
            key = cv2.waitKey(1)
            if key == ord('q') or key & 0xFFFF == 27:
                break

Esempio n. 10

0

Mostra file

    def detect(self, image):
        """Detect ads in the image, return detection results as a dict.

        The return value is as follows:

            {
                'size': [image_width, image_height],
                'boxes': [
                    [x0, y0, x1, y1, probability],
                    ...
                ],
            }

        """
        img = image.resize((FLAGS.size, FLAGS.size))
        if img.mode == 'RGBA':
            img = img.convert(mode='RGB')

        logging.info('Detecting ads')
        t1 = timer()
        detected_boxes = self.sess.run(
            self.boxes,
            feed_dict={self.inputs: [np.array(img, dtype=np.float32)]},
        )
        unique_boxes = non_max_suppression(
            detected_boxes,
            confidence_threshold=FLAGS.conf_threshold,
            iou_threshold=FLAGS.iou_threshold,
        )
        boxes = [
            scale_box(box, image.size) + [float(p)]
            for box, p in unique_boxes[AD_TYPE]
        ]
        t2 = timer()
        logging.debug('Detected boxes: {}'.format(boxes))
        logging.info('Detection complete: found {} ads in {} seconds'.format(
            len(boxes), t2 - t1))

        return {
            'size': image.size,
            'boxes': boxes,
            'detection_time': t2 - t1,
        }

Esempio n. 11

0

Mostra file

File: demo_frozen.py Progetto: zacwitte/tensorflow-yolo-v3

def main(argv=None):
    img = Image.open(FLAGS.input_img)

    input_data = np.stack([np.array(img, dtype=np.float32)])
    print("input shape: %s" % (input_data.shape, ))

    classes = load_coco_names(FLAGS.class_names)

    # Init tf Session
    config = tf.ConfigProto()
    if FLAGS.use_xla:
        jit_level = tf.OptimizerOptions.ON_1
        config.graph_options.optimizer_options.global_jit_level = jit_level
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    sess.run(init)

    # Load the frozen graph
    load_graph(sess, FLAGS.model)

    # Get the input and output tensors
    tf_input = sess.graph.get_tensor_by_name('input:0')
    print("input tensor:")
    print(tf_input)
    tf_output = sess.graph.get_tensor_by_name('output:0')
    print("output tensor:")
    print(tf_output)

    detected_boxes = sess.run(tf_output, feed_dict={tf_input: input_data})

    # TODO: look into using tf.image.non_max_suppression instead
    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)

    # pprint.pprint(filtered_boxes)
    draw_boxes(filtered_boxes, img, classes)

    img.save(FLAGS.output_img)

Esempio n. 12

0

Mostra file

File: object_detector.py Progetto: lucdew/ml-experiments

def detect_obj(file_path):
    img = Image.open(file_path)
    img_resized = img.resize(size=(FLAGS.size, FLAGS.size))

    classes = load_coco_names(FLAGS.class_names)

    # placeholder for detector inputs
    inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3])

    with tf.variable_scope('detector'):
        #detections = yolo_v3(inputs, len(classes), data_format='NCHW')
        detections = yolo_v3(inputs, len(classes), data_format='NHWC')
        #detections = yolo_v3_tiny(inputs, len(classes), data_format='NHWC')
        load_ops = load_weights(tf.global_variables(scope='detector'),
                                FLAGS.weights_file)

    boxes = detections_boxes(detections)

    #saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(load_ops)
        #saver.save(sess, "./yolov3-coco")

        detected_boxes = sess.run(
            boxes,
            feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]})

    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)

    draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size))

    img.save(os.path.join(FLAGS.output_dir, os.path.basename(file_path)))
    tf.reset_default_graph()

Esempio n. 13

0

Mostra file

File: web_footer_false_positive.py Progetto: littlefish12/ad-versarial

def main(argv=None):
    np.random.seed(0)

    safe_mkdir("output/footer/")

    classes = load_coco_names(FLAGS.class_names)

    input_h = 1013
    input_w = 1919
    inputs = tf.placeholder(tf.float32, [None, None, None, 3])
    x_min = tf.placeholder(tf.int32, shape=())
    y_min = tf.placeholder(tf.int32, shape=())

    mask_h = 20
    mask_w = input_w

    mask_val = np.zeros((mask_h, mask_w, 3), dtype=np.float32)
    mask = tf.Variable(initial_value=mask_val, dtype=tf.float32)
    padded_mask = tf.image.pad_to_bounding_box(tf.clip_by_value(mask, 0, 255),
                                               input_h - mask_h, x_min,
                                               input_h, input_w)

    black_box = np.ones_like(mask_val)
    padded_black_box = tf.image.pad_to_bounding_box(black_box,
                                                    input_h - mask_h, x_min,
                                                    input_h, input_w)

    masked_input = tf.clip_by_value(
        inputs * (1 - padded_black_box) + padded_mask, 0, 255)

    inputs_resized = tf.image.resize_images(masked_input,
                                            (FLAGS.size, FLAGS.size),
                                            align_corners=True)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.25
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    detections, boxes_tensor = init_yolo(sess,
                                         inputs_resized,
                                         len(classes),
                                         FLAGS.weights_file,
                                         header_size=4)

    X_train_paths, Y_train = get_input_files_and_labels(
        FLAGS.input_dir + "/train/", input_h, input_w)
    X_test_paths, Y_test = get_input_files_and_labels(
        FLAGS.input_dir + "/test/", input_h, input_w)
    print(len(X_train_paths), len(X_test_paths))
    X_test = np.array([load_image(path)
                       for path in X_test_paths]).astype(np.float32)

    epochs = 251
    batch_size = 4

    loss = tf.nn.relu(1.1 * FLAGS.conf_threshold - boxes_tensor[:, :, 4])
    grad = tf.gradients(tf.reduce_sum(loss), mask)[0]

    opt = tf.train.AdamOptimizer(10.0)
    grad_ph = tf.placeholder(shape=grad.get_shape().as_list(),
                             dtype=tf.float32)
    assign_op = opt.apply_gradients([(grad_ph, mask)])
    sess.run(tf.variables_initializer(opt.variables()))

    assign_eps_op = tf.assign(mask, tf.clip_by_value(mask, 0, 32))

    for epoch in range(epochs):
        if epoch % 50 == 0:

            # box example: class_idx => array[([x0, y0, x1, y1]]
            # {0: [
            #       (array([1101,  581, 1400, 1007]), 1.0),
            #       (array([ 466,  140, 1436,  389]), 1.0),
            #       (array([1419,   25, 1540,   69]), 1.0)
            #     ]
            # }
            feed_dict = {
                inputs: X_test,
            }

            curr_loss, detected_boxes, curr_inputs = \
                batch_eval(sess, [loss, boxes_tensor, masked_input], feed_dict, extra_feed={x_min: 0, y_min: 0})

            curr_mask = sess.run(mask)

            res = Image.fromarray(curr_mask.astype(np.uint8))
            res.save('output/footer/footer_{}.png'.format(epoch))
            res.close()

            num_detect = []
            for j in range(len(X_test)):
                filtered_boxes = \
                    non_max_suppression(detected_boxes[j:j+1],
                                        confidence_threshold=FLAGS.conf_threshold,
                                        iou_threshold=FLAGS.iou_threshold)

                img = Image.fromarray(curr_inputs[j].astype(np.uint8))
                img.save("output/footer/img_{}_{}.png".format(epoch, j))

                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size))
                img.save("output/footer/img_boxes_{}_{}.png".format(epoch, j))

                if False:
                    my_dpi = 96
                    plt.figure(figsize=(input_h / my_dpi, input_w / my_dpi),
                               dpi=my_dpi)
                    plt.imshow(np.array(img))
                    plt.show()

                ground_truth = 0 if 0 not in Y_test[j] else len(Y_test[j][0])
                if len(filtered_boxes) != 0:
                    num_detect.append("{}/{}".format(len(filtered_boxes[0]),
                                                     ground_truth))
                else:
                    num_detect.append("{}/{}".format(0, ground_truth))

            print('test loss={:.3f}'.format(np.sum(curr_loss) / len(X_test)),
                  'num_boxes={}'.format(num_detect))

        batch_idx = np.random.choice(len(X_train_paths),
                                     batch_size,
                                     replace=False)
        X_batch = np.array(
            [load_image(X_train_paths[idx]) for idx in batch_idx])
        Y_batch = [Y_train[idx] for idx in batch_idx]

        for i in range(batch_size):
            if np.random.random() > 0.75:
                h = np.random.randint(20, 100)
                c = np.random.randint(0, 255, size=1)
                X_batch[i, -(mask_h + h):-mask_h, :mask_w, :] = c

        i = 0
        start_score = 0

        max_steps = 10
        while i < max_steps:
            i += 1

            feed_dict = {inputs: np.clip(X_batch, 0, 255), y_min: 0, x_min: 0}
            curr_grad, curr_loss, detected_boxes = \
                sess.run([grad, loss, boxes_tensor], feed_dict=feed_dict)

            num_detect = []
            tot_detected = 0
            tot_surplus = 0

            for j in range(batch_size):

                filtered_boxes = \
                    non_max_suppression(detected_boxes[j:j+1],
                                        confidence_threshold=FLAGS.conf_threshold,
                                        iou_threshold=FLAGS.iou_threshold)

                if i == 1:
                    start_score = 0 if len(filtered_boxes) == 0 else len(
                        filtered_boxes[0])

                if len(filtered_boxes) != 0:
                    num_detect.append("{}/{}".format(len(filtered_boxes[0]),
                                                     len(Y_batch[j][0])))
                    tot_detected += len(filtered_boxes[0])
                    if len(filtered_boxes[0]) > len(Y_batch[j][0]) or len(
                            filtered_boxes[0]) > start_score:
                        tot_surplus += 1
                else:
                    num_detect.append("{}/{}".format(0, len(Y_batch[j][0])))

            print(epoch, i,
                  'loss={:.3f}'.format(np.sum(curr_loss) / batch_size),
                  'num_boxes={}'.format(num_detect))

            if tot_surplus == batch_size:
                i = 1000
            else:
                sess.run(assign_op,
                         feed_dict={
                             grad_ph:
                             curr_grad / (np.linalg.norm(curr_grad) + 1e-8)
                         })
                sess.run(assign_eps_op)

                if i == max_steps:
                    print("no junk")

Esempio n. 14

0

Mostra file

def main(argv=None):
    np.random.seed(0)

    safe_mkdir("output/bbc_false_positive")

    classes = load_coco_names(FLAGS.class_names)

    input_w = 1919
    inputs = tf.placeholder(tf.float32, [None, None, None, 3])

    mask_h = 50
    mask_w = input_w

    mask = tf.Variable(initial_value=np.zeros((mask_h, mask_w, 3)),
                       dtype=tf.float32)
    mask_resized = mask
    mask_resized = tf.image.pad_to_bounding_box(mask_resized, 65, 0,
                                                tf.shape(inputs)[1],
                                                tf.shape(inputs)[2])
    masked_input = tf.clip_by_value(tf.add(inputs, mask_resized), 0, 255)
    inputs_resized = tf.image.resize_images(masked_input,
                                            (FLAGS.size, FLAGS.size),
                                            align_corners=True)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.25
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    image_files = np.array(get_images(os.path.join(FLAGS.input_dir, 'images')))

    X_train = image_files[:80]
    X_test = image_files[80:]

    eps = 4.0
    epochs = 121
    batch_size = 8

    detections, boxes_tensor = init_yolo(sess,
                                         inputs_resized,
                                         len(classes),
                                         FLAGS.weights_file,
                                         header_size=4)

    loss = tf.nn.relu(1.1 * FLAGS.conf_threshold - boxes_tensor[:, :, 4])
    grad = tf.gradients(tf.reduce_sum(loss), mask)[0]

    full_grad = tf.placeholder(dtype=tf.float32, shape=mask.shape)

    opt = tf.train.AdamOptimizer(1)
    assign_op = opt.apply_gradients([(full_grad, mask)])
    sess.run(tf.variables_initializer(opt.variables()))

    eps_assign_op = tf.assign(mask, tf.clip_by_value(mask, -eps, eps))

    for epoch in range(epochs):
        if epoch % 10 == 0:

            X_test_imgs = np.array([
                np.array(load_image(image_file), dtype=np.float32)
                for image_file in X_test
            ])

            feed_dict = {
                inputs: X_test_imgs,
            }

            curr_loss, detected_boxes, curr_inputs = \
                batch_eval(sess, [loss, boxes_tensor, masked_input], feed_dict)
            print(detected_boxes.shape)

            curr_mask = sess.run(mask)

            res = Image.fromarray(curr_mask.astype(np.uint8))
            res.save('output/bbc_false_positive/mask_{}.png'.format(epoch))
            res.close()

            num_detect = []
            for j in range(len(X_test)):
                filtered_boxes = \
                    non_max_suppression(detected_boxes[j:j+1],
                                        confidence_threshold=FLAGS.conf_threshold,
                                        iou_threshold=FLAGS.iou_threshold)

                img = Image.fromarray(curr_inputs[j].astype(np.uint8))
                img.save("output/bbc_false_positive/img_{}_{}.png".format(
                    epoch, j))

                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size))
                img.save(
                    "output/bbc_false_positive/img_boxes_{}_{}.png".format(
                        epoch, j))
                img.close()

                if len(filtered_boxes) != 0:
                    num_detect.append(len(filtered_boxes[0]))

            print('test loss={:.3f}'.format(np.sum(curr_loss) / len(X_test)),
                  'num_boxes={}'.format(num_detect))

        batch_idx = np.random.choice(len(X_train), batch_size, replace=False)
        X_batch = np.array([
            np.array(load_image(image_file), dtype=np.float32)
            for image_file in X_train[batch_idx]
        ])

        i = 0
        orig_detected = [1] * batch_size

        feed_dict = {
            inputs: X_batch,
        }

        while i < 50:
            i += 1

            curr_grad, curr_loss, detected_boxes = \
                sess.run([grad, loss, boxes_tensor], feed_dict=feed_dict)

            num_detect = []

            for j in range(batch_size):

                filtered_boxes = \
                    non_max_suppression(detected_boxes[j:j+1],
                                        confidence_threshold=FLAGS.conf_threshold,
                                        iou_threshold=FLAGS.iou_threshold)

                if len(filtered_boxes) != 0:
                    num_detect.append(len(filtered_boxes[0]))
                else:
                    num_detect.append(0)

            print(epoch, i,
                  'loss={:.3f}'.format(np.sum(curr_loss) / batch_size),
                  'num_boxes={}/{}'.format(num_detect, batch_size))

            if np.all(np.array(num_detect) > np.array(orig_detected)):
                i = 1000

            sess.run(assign_op,
                     feed_dict={
                         full_grad:
                         curr_grad / (np.linalg.norm(curr_grad) + 1e-8)
                     })
            sess.run(eps_assign_op)

Esempio n. 15

0

Mostra file

def main(argv):
    classes = load_coco_names(FLAGS.class_names)
    inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3])
    config = tf.ConfigProto()
    sess = tf.Session(config=config)
    detections, boxes = init_yolo(
        sess, inputs, len(classes),
        FLAGS.weights_file, header_size=4,
    )
    image_meta = load_image_metadata(FLAGS.input_dir)
    safe_mkdir(FLAGS.output_dir)
    summary = {
        'flags': {
            'input_dir': FLAGS.input_dir,
            'output_dir': FLAGS.output_dir,
            'size': FLAGS.size,
            'suppression_threshold': FLAGS.supp_threshold,
            'detection_threshold': FLAGS.conf_threshold,
            'iou_threshold': FLAGS.iou_threshold,
            'match_threshold': FLAGS.match_threshold,
        },
        'images': [],
    }

    for idx, (image_file, regions) in enumerate(image_meta):
        in_name = os.path.basename(image_file)
        out_name = '{}.png'.format(idx)
        print(in_name, '->', out_name)

        img_orig = Image.open(image_file)
        img = img_orig.resize((416, 416))
        if img.mode == 'RGBA':
            img = img.convert(mode='RGB')

        t1 = timer()
        detected_boxes = sess.run(
            boxes,
            feed_dict={inputs: [np.array(img, dtype=np.float32)]},
        )
        t2 = timer()
        unique_boxes = non_max_suppression(
            detected_boxes,
            confidence_threshold=FLAGS.supp_threshold,
            iou_threshold=FLAGS.iou_threshold,
        )
        filtered_boxes = {
            rtype: [
                (box, p)
                for box, p in regions
                if p > FLAGS.conf_threshold
            ]
            for rtype, regions in unique_boxes.items()
        }
        scaled_regions = scale_regions(regions, FLAGS.size)
        tp, fn, fp = compare(filtered_boxes, scaled_regions)
        t3 = timer()

        print('\ttotal time: {}'.format(t3 - t1))
        print('\tTP:{} FN:{} FP:{} Recall:{:.2%} Precision:{:.2%}'
              .format(tp, fn, fp, tp / (tp + fn + 1e-5), tp / (tp + fp + 1e-5)))

        draw_boxes(scaled_regions, img_orig, classes, (FLAGS.size, FLAGS.size),
                   (0, 255, 0))
        draw_boxes(filtered_boxes, img_orig, classes, (FLAGS.size, FLAGS.size))
        img_orig.save(os.path.join(FLAGS.output_dir, out_name))

        summary['images'].append({
            'in_name': in_name,
            'out_name': out_name,
            'nn_time': t2 - t1,
            'total_time': t3 - t1,
            'tp': tp,
            'fn': fn,
            'fp': fp,
            'detected_boxes': conv_boxes(unique_boxes),
            'boxes_above_threshold': conv_boxes(filtered_boxes),
            'marked_boxes': conv_boxes(scaled_regions),
        })

    finalize_summary(
        summary,
        os.path.join(FLAGS.output_dir, 'summary.json'),
    )

Esempio n. 16

0

Mostra file

File: web_overflow_all_overlay.py Progetto: littlefish12/ad-versarial

def main(argv=None):
    np.random.seed(0)

    safe_mkdir("output/overflow/")

    classes = load_coco_names(FLAGS.class_names)

    input_h = 1013
    input_w = 1919
    inputs = tf.placeholder(tf.float32, [None, None, None, 3])

    X_train_paths, Y_train = get_input_files_and_labels(
        FLAGS.input_dir + "/train/", input_h, input_w)
    X_test_paths, Y_test = get_input_files_and_labels(
        FLAGS.input_dir + "/test/", input_h, input_w)
    print(len(X_train_paths), len(X_test_paths))
    X_test = np.array([load_image(path) for path in X_test_paths])

    epochs = 201
    batch_size = 8

    mask_tile = 8
    mask_val = np.zeros((input_h // mask_tile, input_w // mask_tile, 3),
                        dtype=np.float32)
    mask = tf.Variable(initial_value=mask_val, dtype=tf.float32)

    slack_h = input_h - mask_val.shape[0] * mask_tile
    slack_w = input_w - mask_val.shape[1] * mask_tile
    tiled_mask = tf.image.pad_to_bounding_box(
        tf.tile(mask, [mask_tile, mask_tile, 1]), slack_h // 2, slack_w // 2,
        input_h, input_w)

    alpha = 0.01
    masked_input = tf.clip_by_value((1 - alpha) * inputs + alpha * tiled_mask,
                                    0, 255)
    inputs_resized = tf.image.resize_images(masked_input,
                                            (FLAGS.size, FLAGS.size),
                                            align_corners=True)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.25
    sess = tf.Session(config=config)

    sess.run(tf.global_variables_initializer())

    detections, boxes_tensor = init_yolo(sess,
                                         inputs_resized,
                                         len(classes),
                                         FLAGS.weights_file,
                                         header_size=4)

    loss = tf.nn.relu(1.1 * FLAGS.conf_threshold - boxes_tensor[:, :, 4])
    grad = tf.gradients(tf.reduce_sum(loss), mask)[0]
    grad_img = tf.gradients(loss, inputs)[0]

    full_grad = tf.placeholder(dtype=tf.float32, shape=mask.shape)
    opt = tf.train.AdamOptimizer(1.0)
    assign_op = opt.apply_gradients([(full_grad, mask)])
    sess.run(tf.variables_initializer(opt.variables()))

    assign_eps_op = tf.assign(mask, tf.clip_by_value(mask, 0, 255))

    for epoch in range(epochs):
        if epoch % 50 == 0:

            mask_np = sess.run(mask)
            mask_img = Image.fromarray(mask_np.astype(np.uint8))
            mask_img.save("output/overflow/mask_{}.png".format(epoch))
            mask_img.close()

            X_test_copy = np.copy(X_test).astype(np.float32)
            X_test_copy = np.clip(X_test_copy, 0, 255)

            feed_dict = {inputs: X_test_copy}

            curr_loss, detected_boxes, curr_inputs = \
                batch_eval(sess, [loss, boxes_tensor, masked_input], feed_dict)

            num_detect = []
            num_evaded = 0
            for j in range(len(X_test)):
                filtered_boxes = \
                    non_max_suppression(detected_boxes[j:j+1],
                                        confidence_threshold=FLAGS.conf_threshold,
                                        iou_threshold=FLAGS.iou_threshold)

                img = Image.fromarray(curr_inputs[j].astype(np.uint8))
                img.save("output/overflow/img_{}_{}.png".format(epoch, j))

                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size))
                img.save("output/overflow/img_boxes_{}_{}.png".format(
                    epoch, j))

                img.close()

                if False:
                    my_dpi = 96
                    plt.figure(figsize=(input_h / my_dpi, input_w / my_dpi),
                               dpi=my_dpi)
                    plt.imshow(np.array(img))
                    plt.show()

                ground_truth = 0 if 0 not in Y_test[j] else len(Y_test[j][0])
                if len(filtered_boxes) != 0:
                    if len(filtered_boxes[0]) < ground_truth:
                        num_evaded += 1
                    num_detect.append("{}/{}".format(len(filtered_boxes[0]),
                                                     ground_truth))
                else:
                    num_evaded += 1
                    num_detect.append("{}/{}".format(0, ground_truth))

            print('test loss={:.3f}'.format(np.sum(curr_loss) / len(X_test)),
                  'num_boxes={}'.format(num_detect))
            print("evaded {} ads".format(num_evaded))

        batch_idx = np.random.choice(len(X_train_paths),
                                     batch_size,
                                     replace=False)
        X_batch = np.array(
            [load_image(X_train_paths[idx]) for idx in batch_idx])
        Y_batch = [Y_train[idx] for idx in batch_idx]

        jitter_x_low, jitter_x_high = -500, 500
        jitter_y_low, jitter_y_high = -50, 50
        jitters_x = np.zeros(len(X_batch))
        jitters_y = np.zeros(len(X_batch))

        for batch_idx in range(len(X_batch)):
            boxes = Y_batch[batch_idx][0]
            for (box, conf) in boxes:
                x0, y0, x1, y1 = box

                h = y1 - y0
                w = x1 - x0

                low_x = max(-x0, jitter_x_low)
                high_x = min(input_w - x1, jitter_x_high)
                jitter_x = np.random.randint(low_x, high_x)
                low_y = max(-y0, jitter_y_low)
                high_y = min(input_h - y1, jitter_y_high)
                jitter_y = np.random.randint(low_y, high_y)
                jitters_x[batch_idx] = jitter_x
                jitters_y[batch_idx] = jitter_y

                ad = X_batch[batch_idx, y0:y1, x0:x1, :].copy()
                background = X_batch[batch_idx,
                                     min(y0 + 5, input_h - 1),
                                     min(x1 + 5, input_w - 1), :]
                X_batch[batch_idx, y0 - 5:y1 + 5, x0:x1, :] = background
                y0 = y0 + jitter_y
                x0 = x0 + jitter_x
                X_batch[batch_idx, y0:y0 + h, x0:x0 + w, :] = ad

        max_steps = 10
        i = 0

        num_original = []

        while i < max_steps:
            i += 1

            feed_dict = {inputs: np.clip(X_batch, 0, 255)}

            curr_grad, curr_grad_img, curr_loss, detected_boxes = \
                sess.run([grad, grad_img, loss, boxes_tensor], feed_dict=feed_dict)

            num_detect = []

            num_evaded = 0
            for j in range(batch_size):

                filtered_boxes = \
                    non_max_suppression(detected_boxes[j:j+1],
                                        confidence_threshold=FLAGS.conf_threshold,
                                        iou_threshold=FLAGS.iou_threshold)

                if i == 1:
                    if len(filtered_boxes) != 0:
                        num_original.append(len(filtered_boxes[0]))
                    else:
                        num_original.append(0)

                if len(filtered_boxes) != 0:
                    if len(filtered_boxes[0]) < num_original[j]:
                        num_evaded += num_original[j] - len(filtered_boxes[0])

                    num_detect.append("{}/{}".format(len(filtered_boxes[0]),
                                                     num_original[j]))
                else:
                    num_evaded += num_original[j]
                    num_detect.append("{}/{}".format(0, num_original[j]))

            print(epoch, i,
                  'loss={:.3f}'.format(np.sum(curr_loss) / batch_size),
                  'num_boxes={}'.format(num_detect))

            sess.run(assign_op,
                     feed_dict={
                         full_grad:
                         curr_grad / (np.linalg.norm(curr_grad) + 1e-8)
                     })
            sess.run(assign_eps_op)

Esempio n. 17

0

Mostra file

def main(argv=None):

    safe_mkdir("output/scroll/")

    np.random.seed(0)
    classes = load_coco_names(FLAGS.class_names)

    input_h = 1013
    input_w = 1919
    inputs = tf.placeholder(tf.float32, [None, None, None, 3])
    inputs_resized = tf.image.resize_images(inputs, (FLAGS.size, FLAGS.size), align_corners=True)

    if FLAGS.mask:
        alpha = tf.placeholder(shape=(2, 1, 1, 1), dtype=tf.float32)
        mask_tile = 8

        mask_val = load_image(FLAGS.mask)

        mask = tf.Variable(initial_value=mask_val, dtype=tf.float32)
        slack_h = input_h - mask_val.shape[0] * mask_tile
        slack_w = input_w - mask_val.shape[1] * mask_tile
        tiled_mask = tf.image.pad_to_bounding_box(tf.tile(mask, [mask_tile, mask_tile, 1]), slack_h // 2, slack_w // 2,
                                                  input_h, input_w)
        masked_input = tf.clip_by_value((1 - alpha) * inputs + alpha * tiled_mask, 0, 255)
        inputs_resized = tf.image.resize_images(masked_input, (FLAGS.size, FLAGS.size),
                                                align_corners=True)

    if FLAGS.footer:
        footer = load_image(FLAGS.footer)
        footer_h = footer.shape[0]

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.25
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    detections, boxes_tensor = init_yolo(sess, inputs_resized, len(classes),
                                         FLAGS.weights_file, header_size=4)

    full_page = Image.open(FLAGS.full_page)
    w, h = full_page.size

    ratio = w / (1.0 * input_w)
    new_h = int(h * ratio)
    full_page = np.array(full_page.resize((input_w, new_h))).astype(np.float32)
    full_page = cv2.cvtColor(full_page, cv2.COLOR_RGBA2RGB)

    to_scroll = new_h - input_h
    print(to_scroll)

    num_outputs = 100
    scroll_dh = to_scroll // num_outputs

    for i in range(num_outputs):
        img = full_page[i*scroll_dh:i*scroll_dh + input_h, :, :].astype(np.float32)
        img_adv = img.copy().astype(np.float32)

        feed_dict = {
            inputs: [img, img_adv]
        }

        if FLAGS.footer:
            img_adv[-footer_h:, :, :] = footer

        if FLAGS.mask:
            feed_dict[alpha] = [[[[0.0]]], [[[0.01]]]]

        detected_boxes = sess.run(boxes_tensor, feed_dict=feed_dict)

        filtered_boxes = non_max_suppression(detected_boxes[:1], confidence_threshold=FLAGS.conf_threshold,
                                             iou_threshold=FLAGS.iou_threshold)

        filtered_boxes_adv = non_max_suppression(detected_boxes[1:], confidence_threshold=FLAGS.conf_threshold,
                                             iou_threshold=FLAGS.iou_threshold)

        num_ads = 0 if len(filtered_boxes) == 0 else len(filtered_boxes[0])
        num_ads_adv = 0 if len(filtered_boxes_adv) == 0 else len(filtered_boxes_adv[0])

        print(i, num_ads, num_ads_adv)

        img = Image.fromarray(img.astype(np.uint8))
        draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size))
        img.save("output/scroll/img_boxes_{0:03d}.png".format(i))

        img_adv = Image.fromarray(img_adv.astype(np.uint8))
        draw_boxes(filtered_boxes_adv, img_adv, classes, (FLAGS.size, FLAGS.size))
        img_adv.save("output/scroll/img_adv_boxes_{0:03d}.png".format(i))

Esempio n. 18

0

Mostra file

def main(argv=None):
    np.random.seed(0)

    safe_mkdir("output/bbc_evade")

    classes = load_coco_names(FLAGS.class_names)

    input_h = 1013
    input_w = 1919
    inputs = tf.placeholder(tf.float32, [None, None, None, 3])

    x_min = tf.placeholder(tf.int32, shape=[None])
    y_min = tf.placeholder(tf.int32, shape=[None])
    x_min2 = tf.placeholder(tf.int32, shape=[None])
    y_min2 = tf.placeholder(tf.int32, shape=[None])

    mask_h = 40
    mask_w = 820 + 200
    mask = tf.Variable(
        initial_value=255 +
        0 * np.random.randint(low=0, high=255, size=(mask_h, mask_w, 3)),
        dtype=tf.float32)

    padded_mask = tf.map_fn(
        lambda dims: tf.image.pad_to_bounding_box(mask, dims[0], dims[1],
                                                  tf.shape(inputs)[1],
                                                  tf.shape(inputs)[2]),
        (y_min, x_min),
        dtype=tf.float32)

    black_box = tf.ones([mask_h, mask_w, 3], dtype=tf.float32)
    black_mask = 1.0 - tf.map_fn(
        lambda dims: tf.image.pad_to_bounding_box(black_box, dims[0], dims[1],
                                                  tf.shape(inputs)[1],
                                                  tf.shape(inputs)[2]),
        (y_min, x_min),
        dtype=tf.float32)

    blacked_inputs = tf.multiply(inputs, black_mask)
    masked_input = tf.clip_by_value(tf.add(blacked_inputs, padded_mask), 0,
                                    255)
    inputs_resized = tf.image.resize_images(masked_input,
                                            (FLAGS.size, FLAGS.size),
                                            align_corners=True)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.25
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    image_files = np.array(get_images(os.path.join(FLAGS.input_dir, 'images')))
    np.random.seed(0)
    np.random.shuffle(image_files)
    image_names = [get_file_name(f) for f in image_files]

    label_files = [
        os.path.join(FLAGS.input_dir, 'labels', name + '.txt')
        for name in image_names
    ]
    assert np.all([os.path.isfile(lf) for lf in label_files])

    all_labels = np.array(
        [load_labels(label_file) for label_file in label_files])

    all_labels = [
        convert_labels(labels, (input_w, input_h)) for labels in all_labels
    ]

    X_train = image_files[:80]
    Y_train = all_labels[:80]
    X_test = image_files[80:]
    Y_test = all_labels[80:]

    X_test = np.array([
        np.array(load_image(image_file), dtype=np.float32)
        for image_file in X_test
    ])

    print(len(X_train), len(X_test))
    epochs = 151
    batch_size = 8

    detections, boxes_tensor = init_yolo(sess,
                                         inputs_resized,
                                         len(classes),
                                         FLAGS.weights_file,
                                         header_size=4)

    loss = tf.reduce_sum(
        tf.nn.relu(boxes_tensor[:, :, 4] - 0.1 * FLAGS.conf_threshold))
    grad = tf.gradients(loss, mask)[0]

    full_grad = tf.placeholder(dtype=tf.float32, shape=mask.shape)

    eps = 3.0

    opt = tf.train.AdamOptimizer(1.0)
    assign_op = opt.apply_gradients([(full_grad, mask)])
    sess.run(tf.variables_initializer(opt.variables()))

    for epoch in range(epochs):
        if epoch % 10 == 0:
            print_idx = (epoch // 10) % len(X_test)

            boxes = [label[0][0][0] for label in Y_test]
            x0 = [box[0] + 5 - 100 for box in boxes]
            y1 = [box[-1] - 10 for box in boxes]
            y0 = [box[1] + 5 for box in boxes]
            feed_dict = {
                inputs: X_test,
                x_min: x0,
                y_min: y1,
                x_min2: x0,
                y_min2: y0
            }

            detected_boxes, curr_inputs = batch_eval(
                sess, [boxes_tensor, masked_input], feed_dict)

            num_detect = []
            for j in range(len(X_test)):
                filtered_boxes = \
                    non_max_suppression(detected_boxes[j:j+1],
                                        confidence_threshold=FLAGS.conf_threshold,
                                        iou_threshold=FLAGS.iou_threshold)

                img = Image.fromarray(curr_inputs[j].astype(np.uint8))
                img.save("output/bbc_evade/img_{}_{}.png".format(epoch, j))

                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size))
                img.save("output/bbc_evade/img_boxes_{}_{}.png".format(
                    epoch, j))

                if False:
                    img_masked = sess.run(masked_input, feed_dict=feed_dict)
                    img = Image.fromarray(img_masked[print_idx].astype(
                        np.uint8))
                    draw_boxes(filtered_boxes, img, classes,
                               (FLAGS.size, FLAGS.size))
                    my_dpi = 96
                    plt.figure(figsize=(input_h / my_dpi, input_w / my_dpi),
                               dpi=my_dpi)
                    plt.imshow(np.array(img))
                    plt.show()

                if len(filtered_boxes) > 0:
                    num_detect.append(len(filtered_boxes[0]))
                else:
                    num_detect.append(0)

            print('num_boxes={}'.format(num_detect))

        batch_idx = np.random.choice(len(X_train), batch_size, replace=False)

        X_batch = np.array([
            np.array(load_image(image_file), dtype=np.float32)
            for image_file in X_train[batch_idx]
        ])
        Y_batch = [Y_train[i] for i in batch_idx]

        ad_idx = np.random.choice(len(X_batch), batch_size, replace=True)
        for i in range(batch_size):
            x0, y0, x1, y1 = Y_batch[ad_idx[i]][0][0][0]
            ad = X_batch[ad_idx[i], y0:y1, x0:x1, :]
            x0b, y0b, x1b, y1b = Y_batch[i][0][0][0]

            x1b = x0b + (x1 - x0)
            y1b = y0b + (y1 - y0)

            X_batch[i, y0b:y1b, x0b:x1b, :] = ad

        boxes = [label[0][0][0] for label in Y_batch]
        x0 = [box[0] + 5 - 100 for box in boxes]
        y1 = [box[-1] - 10 for box in boxes]
        y0 = [box[1] + 5 for box in boxes]

        i = 0
        stop = False
        while not stop:
            i += 1

            feed_dict = {
                inputs: X_batch,
                x_min: x0,
                y_min: y1,
                x_min2: x0,
                y_min2: y0
            }
            curr_grad, curr_loss, detected_boxes = \
                sess.run([grad, loss, boxes_tensor], feed_dict=feed_dict)

            num_detect = 0

            for j in range(batch_size):

                filtered_boxes = \
                    non_max_suppression(detected_boxes[j:j+1],
                                        confidence_threshold=FLAGS.conf_threshold,
                                        iou_threshold=FLAGS.iou_threshold)

                if len(filtered_boxes) != 0:
                    num_detect += len(filtered_boxes[0])

            print(epoch, i, 'loss={:.3f}'.format(curr_loss / batch_size),
                  'num_boxes={}/{}'.format(num_detect, batch_size))

            if (num_detect == 0) or (i >= 50):
                stop = True

            sess.run(assign_op,
                     feed_dict={
                         full_grad:
                         curr_grad / (np.linalg.norm(curr_grad) + 1e-8)
                     })
            sess.run(tf.assign(mask, tf.clip_by_value(mask, 255 - eps, 255)))