Esempio n. 1
0
def run():
    pl_images = tf.placeholder(
        shape=[cfg.batch_size, cfg.image_size[0], cfg.image_size[1], 3],
        dtype=tf.float32)
    pl_gt_boxs = tf.placeholder(shape=[cfg.batch_size, 50, 4],
                                dtype=tf.float32)
    pl_label = tf.placeholder(shape=[cfg.batch_size, 50], dtype=tf.int32)
    pl_input_rpn_match = tf.placeholder(
        shape=[cfg.batch_size, cfg.total_anchors, 1], dtype=tf.int32)
    pl_input_rpn_bbox = tf.placeholder(
        shape=[cfg.batch_size, cfg.RPN_TRAIN_ANCHORS_PER_IMAGE, 4],
        dtype=tf.float32)

    train_tensors, sum_op, vbs = loss(pl_gt_boxs, pl_images, pl_input_rpn_bbox,
                                      pl_input_rpn_match, pl_label)

    optimizer = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
    train_op = slim.learning.create_train_op(train_tensors, optimizer)

    saver = tf.train.Saver(vbs)

    def restore(sess):
        saver.restore(
            sess,
            '/home/dsl/all_check/face_detect/nn_faster_rcnn/model.ckpt-86737')

    sv = tf.train.Supervisor(
        logdir='/home/dsl/all_check/face_detect/nn_faster_rcnn_sec',
        summary_op=None,
        init_fn=restore)

    with sv.managed_session() as sess:
        for step in range(1000000000):

            images, boxs, label, input_rpn_match, input_rpn_bbox = q.get()
            gt_boxs = utils.norm_boxes(boxs, shape=cfg.image_size)

            feed_dict = {
                pl_images: images,
                pl_gt_boxs: gt_boxs,
                pl_label: label,
                pl_input_rpn_bbox: input_rpn_bbox,
                pl_input_rpn_match: input_rpn_match
            }
            t = time.time()
            ls = sess.run(train_op, feed_dict=feed_dict)
            if step % 10 == 0:
                print(time.time() - t)
                summaries = sess.run(sum_op, feed_dict=feed_dict)
                sv.summary_computed(sess, summaries)
                print(ls)
Esempio n. 2
0
 def get_anchors(self, image_shape):
     """Returns anchor pyramid for the given image size."""
     feature_map_size = image_shape[0] // config.RPN_DOWNSCALE
     # Cache anchors and reuse if image shape is the same
     if tuple(image_shape) not in self._anchor_cache:
         # Generate Anchors
         a = utils.generate_anchors(config.RPN_ANCHOR_HEIGHTS,
                                    config.RPN_ANCHOR_WIDTHS,
                                    feature_map_size, config.RPN_DOWNSCALE,
                                    config.RPN_ANCHOR_STRIDE)
         # Normalize coordinates
         self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(
             a, image_shape[:2])
     return self._anchor_cache[tuple(image_shape)]
Esempio n. 3
0
 def get_anchors(self, image_shape):
     backbone_shapes = utils.compute_backbone_shapes(
         self.backbone, self.backbone_strides, image_shape)
     if not hasattr(self, "_anchor_cache"):
         self._anchor_cache = {}
     if not tuple(image_shape) in self._anchor_cache:
         a = utils.generate_pyramid_anchors(self.rpn_anchor_scales,
                                            self.rpn_anchor_ratios,
                                            backbone_shapes,
                                            self.backbone_strides,
                                            self.rpn_anchor_stride)
         self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(
             a, image_shape[:2])
     return self._anchor_cache[tuple(image_shape)]
def generate_all_anchors(fpn_shapes, image_shape, config):
    '''
	generate anchor for pyramid feature maps
	'''
    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, \
               config.RPN_ANCHOR_RATIOS, \
               fpn_shapes, \
               config.BACKBONE_STRIDES, \
               config.RPN_ANCHOR_STRIDE)
    # normalize coordinates
    # numpy array [N, 4]
    norm_anchors = utils.norm_boxes(anchors, image_shape)
    anchors_tensor = tf.convert_to_tensor(norm_anchors)
    # Duplicate across the batch dimension
    batch_anchors = tf.broadcast_to(anchors_tensor,\
        [config.IMAGES_PER_GPU, tf.shape(anchors_tensor)[0],tf.shape(anchors_tensor)[1]])
    return batch_anchors
Esempio n. 5
0
    def __call__(self, ipt):
        rois = ipt[0]
        mrcnn_class = ipt[1]
        mrcnn_bbox = ipt[2]
        image_meta = ipt[3]

        m = utils.parse_image_meta_graph(image_meta)
        image_shape = m['image_shape'][0]
        window = utils.norm_boxes(m['window'], image_shape[:2])

        detections_batch = utils.batch_slice(
            [rois, mrcnn_class, mrcnn_bbox, window],
            lambda w, x, y, z: layer.refine_detections(w, x, y, z),
            self.image_per_gpu)

        return tf.reshape(
            detections_batch,
            [self.image_per_gpu, self.detection_max_instances, 6])
Esempio n. 6
0
def generate_image(train):
    """
    Return:
        image: Image as np.ndarray
        gt_cls: Array of classes of crops in [N,]
        gt_boxes: Array of normalized bounding boxes for each crop in [N, (y1, x2, y2, x2)]
    """
    image = np.zeros([RPN.h, RPN.w], dtype=dtype)

    n_crops = np.random.randint(1, max_crops + 1)
    gt_cls, gt_boxes = map(
        np.array, zip(*[add_crop(image, train) for i in range(n_crops)]))

    padding_boxes = -np.ones([max_crops - n_crops, 4], np.float64)
    gt_boxes = np.concatenate([gt_boxes, padding_boxes], axis=0)
    gt_boxes = utils.norm_boxes(gt_boxes, [RPN.h, RPN.w])

    image = cv2.merge([image] * 3)
    return image, gt_cls, gt_boxes
Esempio n. 7
0
def eager_run():

    tf.enable_eager_execution()
    for s in range(10):
        images, boxs, label, input_rpn_match, input_rpn_bbox = q.get()
        print(input_rpn_bbox.shape)
        gt_boxs = utils.norm_boxes(boxes=boxs, shape=cfg.image_size)
        c1, c2, c3, v = model(images)
        fp = [c1, c2, c3]
        rpn_c_l = []
        r_p = []
        r_b = []
        for f in fp:
            rpn_class_logits, rpn_probs, rpn_bbox = rpn_graph(f)
            rpn_c_l.append(rpn_class_logits)
            r_p.append(rpn_probs)
            r_b.append(rpn_bbox)
        rpn_class_logits = tf.concat(rpn_c_l, axis=1)
        rpn_probs = tf.concat(r_p, axis=1)
        rpn_bbox = tf.concat(r_b, axis=1)

        rpn_rois = propsal(rpn_probs, rpn_bbox)

        rois, target_class_ids, target_bbox = detection_target(
            rpn_rois, label, gt_boxs)

        mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifier_graph(
            rois, fp)

        mrcnn_class_logits = tf.squeeze(mrcnn_class_logits, axis=[1, 2])

        rpn_class_loss = losses.rpn_class_loss_graph(input_rpn_match,
                                                     rpn_class_logits)

        rpn_bbox_loss = losses.rpn_bbox_loss_graph(input_rpn_bbox,
                                                   input_rpn_match, rpn_bbox,
                                                   cfg)

        class_loss = losses.mrcnn_class_loss_graph(target_class_ids,
                                                   mrcnn_class_logits)

        bbox_loss = losses.mrcnn_bbox_loss_graph(target_bbox, target_class_ids,
                                                 mrcnn_bbox)
Esempio n. 8
0
    def __init__(self, is_train):
        self.is_train = is_train
        #self.anchors_scals = [128, 256, 512]
        self.anchors_scals = [(16, 32, 64), (96, 156, 244), (294, 349, 420)]
        self.anchors_radios = [0.5, 1, 2]
        self.feature_stride = [8, 16, 32]
        self.image_size = [512, 512]
        self.num_class = 21
        self.batch_size = 8
        self.RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
        self.RPN_TRAIN_ANCHORS_PER_IMAGE = 256
        self.BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
        self.RPN_NMS_THRESHOLD = 0.7
        self.feature_shape = [(np.ceil(self.image_size[0] / x),
                               np.ceil(self.image_size[0] / x))
                              for x in self.feature_stride]
        self.total_anchors = sum(f_shape[0] * f_shape[1]
                                 for f_shape in self.feature_shape) * 9
        self.anchors = gen_anchor.gen_multi_anchors(
            scales=self.anchors_scals,
            ratios=self.anchors_radios,
            shape=self.feature_shape,
            feature_stride=self.feature_stride)

        self.norm_anchors = utils.norm_boxes(self.anchors, self.image_size)
        self.VOC_CLASSES = ('back', 'aeroplane', 'bicycle', 'bird', 'boat',
                            'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
                            'diningtable', 'dog', 'horse', 'motorbike',
                            'person', 'pottedplant', 'sheep', 'sofa', 'train',
                            'tvmonitor')

        self.TRAIN_ROIS_PER_IMAGE = 200
        self.DETECTION_MIN_CONFIDENCE = 0.6
        self.DETECTION_MAX_INSTANCES = 100
        self.DETECTION_NMS_THRESHOLD = 0.3
        self.pool_shape = 7
        self.ROI_POSITIVE_RATIO = 0.33
        if is_train:
            self.NMS_ROIS_TRAINING = 2000
        else:
            self.NMS_ROIS_TRAINING = 1000
            self.batch_size = 1
Esempio n. 9
0
def get_anchors(image_shape, config):
    """Returns anchor pyramid for the given image size."""
    backbone_shapes = compute_backbone_shapes(config, image_shape)
    # Cache anchors and reuse if image shape is the same
    _anchor_cache = {}
    if not tuple(image_shape) in _anchor_cache:
        # Generate Anchors
        a = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                           config.RPN_ANCHOR_RATIOS,
                                           backbone_shapes,
                                           config.BACKBONE_STRIDES,
                                           config.RPN_ANCHOR_STRIDE)
        # Keep a copy of the latest anchors in pixel coordinates because
        # it's used in inspect_model notebooks.
        # TODO: Remove this after the notebook are refactored to not use it
        anchors = a
        # Normalize coordinates
        _anchor_cache[tuple(image_shape)] = utils.norm_boxes(
            a, image_shape[:2])
    return _anchor_cache[tuple(image_shape)]
Esempio n. 10
0
def unmold_detections(detections, mrcnn_mask, original_image_shape,
                      image_shape, window):
    """Reformats the detections of one image from the format of the neural
        network output to a format suitable for use in the rest of the
        application.

        detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates
        mrcnn_mask: [N, height, width, num_classes]
        original_image_shape: [H, W, C] Original image shape before resizing
        image_shape: [H, W, C] Shape of the image after resizing and padding
        window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real
                image is excluding the padding.

        Returns:
        boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels
        class_ids: [N] Integer class IDs for each bounding box
        scores: [N] Float probability scores of the class_id
        masks: [height, width, num_instances] Instance masks
        """
    # How many detections do we have?
    # Detections array is padded with zeros. Find the first class_id == 0.
    zero_ix = np.where(detections[:, 4] == 0)[0]
    N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0]

    # Extract boxes, class_ids, scores, and class-specific masks
    boxes = detections[:N, :4]
    class_ids = detections[:N, 4].astype(np.int32)
    scores = detections[:N, 5]
    masks = mrcnn_mask[np.arange(N), :, :, class_ids]

    # Translate normalized coordinates in the resized image to pixel
    # coordinates in the original image before resizing
    window = utils.norm_boxes(window, image_shape[:2])
    wy1, wx1, wy2, wx2 = window
    shift = np.array([wy1, wx1, wy1, wx1])
    wh = wy2 - wy1  # window height
    ww = wx2 - wx1  # window width
    scale = np.array([wh, ww, wh, ww])
    # Convert boxes to normalized coordinates on the window
    boxes = np.divide(boxes - shift, scale)
    # Convert boxes to pixel coordinates on the original image
    boxes = utils.denorm_boxes(boxes, original_image_shape[:2])

    # Filter out detections with zero area. Happens in early training when
    # network weights are still random
    exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) *
                          (boxes[:, 3] - boxes[:, 1]) <= 0)[0]
    if exclude_ix.shape[0] > 0:
        boxes = np.delete(boxes, exclude_ix, axis=0)
        class_ids = np.delete(class_ids, exclude_ix, axis=0)
        scores = np.delete(scores, exclude_ix, axis=0)
        masks = np.delete(masks, exclude_ix, axis=0)
        N = class_ids.shape[0]

    # Resize masks to original image size and set boundary threshold.
    full_masks = []
    for i in range(N):
        # Convert neural network mask to full size mask
        full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape)
        full_masks.append(full_mask)
    full_masks = np.stack(full_masks, axis=-1)\
        if full_masks else np.empty(masks.shape[1:3] + (0,))

    return boxes, class_ids, scores, full_masks
Esempio n. 11
0
    molded_image = molded_image[np.newaxis, :]
    #print("Backbone shape is : ", backbone_shapes)
    anchors = utils.generate_pyramid_anchors(inferconfig.RPN_ANCHOR_SCALES,
                                             inferconfig.RPN_ANCHOR_RATIOS,
                                             backbone_shapes,
                                             inferconfig.BACKBONE_STRIDES,
                                             inferconfig.RPN_ANCHOR_STRIDE)
    #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_SCALES)
    #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_RATIOS)
    #print("Anchor generate paramenter :",backbone_shapes)
    #print("Anchor generate parameter : ",inferconfig.BACKBONE_STRIDES)
    #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_STRIDE)
    #print("Original anchor shape is :", anchors.shape)
    anchors = np.broadcast_to(anchors,
                              (inferconfig.BATCH_SIZE, ) + anchors.shape)
    anchors = utils.norm_boxes(anchors, imageshapeinfer[:2])
    print("The input anchors shape is : ", anchors.shape)
    print('The input anchors are : \n', anchors)
    #print(image.shape)
    test_list = []
    for count, op in enumerate(graph.get_operations()):
        if "detection" in op.name:
            print(op.name)
            test_list.append(op.name)
    #print(graph.get_operation_by_name('prefix/input_image'))
    # prefix/Placeholder/inputs_placeholder
    # ...
    # prefix/Accuracy/predictions

    # We access the input and output nodes
    # x = graph.get_tensor_by_name('prefix/*/inputs_placeholder:0')