def run(): pl_images = tf.placeholder( shape=[cfg.batch_size, cfg.image_size[0], cfg.image_size[1], 3], dtype=tf.float32) pl_gt_boxs = tf.placeholder(shape=[cfg.batch_size, 50, 4], dtype=tf.float32) pl_label = tf.placeholder(shape=[cfg.batch_size, 50], dtype=tf.int32) pl_input_rpn_match = tf.placeholder( shape=[cfg.batch_size, cfg.total_anchors, 1], dtype=tf.int32) pl_input_rpn_bbox = tf.placeholder( shape=[cfg.batch_size, cfg.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=tf.float32) train_tensors, sum_op, vbs = loss(pl_gt_boxs, pl_images, pl_input_rpn_bbox, pl_input_rpn_match, pl_label) optimizer = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9) train_op = slim.learning.create_train_op(train_tensors, optimizer) saver = tf.train.Saver(vbs) def restore(sess): saver.restore( sess, '/home/dsl/all_check/face_detect/nn_faster_rcnn/model.ckpt-86737') sv = tf.train.Supervisor( logdir='/home/dsl/all_check/face_detect/nn_faster_rcnn_sec', summary_op=None, init_fn=restore) with sv.managed_session() as sess: for step in range(1000000000): images, boxs, label, input_rpn_match, input_rpn_bbox = q.get() gt_boxs = utils.norm_boxes(boxs, shape=cfg.image_size) feed_dict = { pl_images: images, pl_gt_boxs: gt_boxs, pl_label: label, pl_input_rpn_bbox: input_rpn_bbox, pl_input_rpn_match: input_rpn_match } t = time.time() ls = sess.run(train_op, feed_dict=feed_dict) if step % 10 == 0: print(time.time() - t) summaries = sess.run(sum_op, feed_dict=feed_dict) sv.summary_computed(sess, summaries) print(ls)
def get_anchors(self, image_shape): """Returns anchor pyramid for the given image size.""" feature_map_size = image_shape[0] // config.RPN_DOWNSCALE # Cache anchors and reuse if image shape is the same if tuple(image_shape) not in self._anchor_cache: # Generate Anchors a = utils.generate_anchors(config.RPN_ANCHOR_HEIGHTS, config.RPN_ANCHOR_WIDTHS, feature_map_size, config.RPN_DOWNSCALE, config.RPN_ANCHOR_STRIDE) # Normalize coordinates self._anchor_cache[tuple(image_shape)] = utils.norm_boxes( a, image_shape[:2]) return self._anchor_cache[tuple(image_shape)]
def get_anchors(self, image_shape): backbone_shapes = utils.compute_backbone_shapes( self.backbone, self.backbone_strides, image_shape) if not hasattr(self, "_anchor_cache"): self._anchor_cache = {} if not tuple(image_shape) in self._anchor_cache: a = utils.generate_pyramid_anchors(self.rpn_anchor_scales, self.rpn_anchor_ratios, backbone_shapes, self.backbone_strides, self.rpn_anchor_stride) self._anchor_cache[tuple(image_shape)] = utils.norm_boxes( a, image_shape[:2]) return self._anchor_cache[tuple(image_shape)]
def generate_all_anchors(fpn_shapes, image_shape, config): ''' generate anchor for pyramid feature maps ''' anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, \ config.RPN_ANCHOR_RATIOS, \ fpn_shapes, \ config.BACKBONE_STRIDES, \ config.RPN_ANCHOR_STRIDE) # normalize coordinates # numpy array [N, 4] norm_anchors = utils.norm_boxes(anchors, image_shape) anchors_tensor = tf.convert_to_tensor(norm_anchors) # Duplicate across the batch dimension batch_anchors = tf.broadcast_to(anchors_tensor,\ [config.IMAGES_PER_GPU, tf.shape(anchors_tensor)[0],tf.shape(anchors_tensor)[1]]) return batch_anchors
def __call__(self, ipt): rois = ipt[0] mrcnn_class = ipt[1] mrcnn_bbox = ipt[2] image_meta = ipt[3] m = utils.parse_image_meta_graph(image_meta) image_shape = m['image_shape'][0] window = utils.norm_boxes(m['window'], image_shape[:2]) detections_batch = utils.batch_slice( [rois, mrcnn_class, mrcnn_bbox, window], lambda w, x, y, z: layer.refine_detections(w, x, y, z), self.image_per_gpu) return tf.reshape( detections_batch, [self.image_per_gpu, self.detection_max_instances, 6])
def generate_image(train): """ Return: image: Image as np.ndarray gt_cls: Array of classes of crops in [N,] gt_boxes: Array of normalized bounding boxes for each crop in [N, (y1, x2, y2, x2)] """ image = np.zeros([RPN.h, RPN.w], dtype=dtype) n_crops = np.random.randint(1, max_crops + 1) gt_cls, gt_boxes = map( np.array, zip(*[add_crop(image, train) for i in range(n_crops)])) padding_boxes = -np.ones([max_crops - n_crops, 4], np.float64) gt_boxes = np.concatenate([gt_boxes, padding_boxes], axis=0) gt_boxes = utils.norm_boxes(gt_boxes, [RPN.h, RPN.w]) image = cv2.merge([image] * 3) return image, gt_cls, gt_boxes
def eager_run(): tf.enable_eager_execution() for s in range(10): images, boxs, label, input_rpn_match, input_rpn_bbox = q.get() print(input_rpn_bbox.shape) gt_boxs = utils.norm_boxes(boxes=boxs, shape=cfg.image_size) c1, c2, c3, v = model(images) fp = [c1, c2, c3] rpn_c_l = [] r_p = [] r_b = [] for f in fp: rpn_class_logits, rpn_probs, rpn_bbox = rpn_graph(f) rpn_c_l.append(rpn_class_logits) r_p.append(rpn_probs) r_b.append(rpn_bbox) rpn_class_logits = tf.concat(rpn_c_l, axis=1) rpn_probs = tf.concat(r_p, axis=1) rpn_bbox = tf.concat(r_b, axis=1) rpn_rois = propsal(rpn_probs, rpn_bbox) rois, target_class_ids, target_bbox = detection_target( rpn_rois, label, gt_boxs) mrcnn_class_logits, mrcnn_class, mrcnn_bbox = fpn_classifier_graph( rois, fp) mrcnn_class_logits = tf.squeeze(mrcnn_class_logits, axis=[1, 2]) rpn_class_loss = losses.rpn_class_loss_graph(input_rpn_match, rpn_class_logits) rpn_bbox_loss = losses.rpn_bbox_loss_graph(input_rpn_bbox, input_rpn_match, rpn_bbox, cfg) class_loss = losses.mrcnn_class_loss_graph(target_class_ids, mrcnn_class_logits) bbox_loss = losses.mrcnn_bbox_loss_graph(target_bbox, target_class_ids, mrcnn_bbox)
def __init__(self, is_train): self.is_train = is_train #self.anchors_scals = [128, 256, 512] self.anchors_scals = [(16, 32, 64), (96, 156, 244), (294, 349, 420)] self.anchors_radios = [0.5, 1, 2] self.feature_stride = [8, 16, 32] self.image_size = [512, 512] self.num_class = 21 self.batch_size = 8 self.RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) self.RPN_TRAIN_ANCHORS_PER_IMAGE = 256 self.BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) self.RPN_NMS_THRESHOLD = 0.7 self.feature_shape = [(np.ceil(self.image_size[0] / x), np.ceil(self.image_size[0] / x)) for x in self.feature_stride] self.total_anchors = sum(f_shape[0] * f_shape[1] for f_shape in self.feature_shape) * 9 self.anchors = gen_anchor.gen_multi_anchors( scales=self.anchors_scals, ratios=self.anchors_radios, shape=self.feature_shape, feature_stride=self.feature_stride) self.norm_anchors = utils.norm_boxes(self.anchors, self.image_size) self.VOC_CLASSES = ('back', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') self.TRAIN_ROIS_PER_IMAGE = 200 self.DETECTION_MIN_CONFIDENCE = 0.6 self.DETECTION_MAX_INSTANCES = 100 self.DETECTION_NMS_THRESHOLD = 0.3 self.pool_shape = 7 self.ROI_POSITIVE_RATIO = 0.33 if is_train: self.NMS_ROIS_TRAINING = 2000 else: self.NMS_ROIS_TRAINING = 1000 self.batch_size = 1
def get_anchors(image_shape, config): """Returns anchor pyramid for the given image size.""" backbone_shapes = compute_backbone_shapes(config, image_shape) # Cache anchors and reuse if image shape is the same _anchor_cache = {} if not tuple(image_shape) in _anchor_cache: # Generate Anchors a = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, backbone_shapes, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) # Keep a copy of the latest anchors in pixel coordinates because # it's used in inspect_model notebooks. # TODO: Remove this after the notebook are refactored to not use it anchors = a # Normalize coordinates _anchor_cache[tuple(image_shape)] = utils.norm_boxes( a, image_shape[:2]) return _anchor_cache[tuple(image_shape)]
def unmold_detections(detections, mrcnn_mask, original_image_shape, image_shape, window): """Reformats the detections of one image from the format of the neural network output to a format suitable for use in the rest of the application. detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates mrcnn_mask: [N, height, width, num_classes] original_image_shape: [H, W, C] Original image shape before resizing image_shape: [H, W, C] Shape of the image after resizing and padding window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real image is excluding the padding. Returns: boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels class_ids: [N] Integer class IDs for each bounding box scores: [N] Float probability scores of the class_id masks: [height, width, num_instances] Instance masks """ # How many detections do we have? # Detections array is padded with zeros. Find the first class_id == 0. zero_ix = np.where(detections[:, 4] == 0)[0] N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] # Extract boxes, class_ids, scores, and class-specific masks boxes = detections[:N, :4] class_ids = detections[:N, 4].astype(np.int32) scores = detections[:N, 5] masks = mrcnn_mask[np.arange(N), :, :, class_ids] # Translate normalized coordinates in the resized image to pixel # coordinates in the original image before resizing window = utils.norm_boxes(window, image_shape[:2]) wy1, wx1, wy2, wx2 = window shift = np.array([wy1, wx1, wy1, wx1]) wh = wy2 - wy1 # window height ww = wx2 - wx1 # window width scale = np.array([wh, ww, wh, ww]) # Convert boxes to normalized coordinates on the window boxes = np.divide(boxes - shift, scale) # Convert boxes to pixel coordinates on the original image boxes = utils.denorm_boxes(boxes, original_image_shape[:2]) # Filter out detections with zero area. Happens in early training when # network weights are still random exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] if exclude_ix.shape[0] > 0: boxes = np.delete(boxes, exclude_ix, axis=0) class_ids = np.delete(class_ids, exclude_ix, axis=0) scores = np.delete(scores, exclude_ix, axis=0) masks = np.delete(masks, exclude_ix, axis=0) N = class_ids.shape[0] # Resize masks to original image size and set boundary threshold. full_masks = [] for i in range(N): # Convert neural network mask to full size mask full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape) full_masks.append(full_mask) full_masks = np.stack(full_masks, axis=-1)\ if full_masks else np.empty(masks.shape[1:3] + (0,)) return boxes, class_ids, scores, full_masks
molded_image = molded_image[np.newaxis, :] #print("Backbone shape is : ", backbone_shapes) anchors = utils.generate_pyramid_anchors(inferconfig.RPN_ANCHOR_SCALES, inferconfig.RPN_ANCHOR_RATIOS, backbone_shapes, inferconfig.BACKBONE_STRIDES, inferconfig.RPN_ANCHOR_STRIDE) #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_SCALES) #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_RATIOS) #print("Anchor generate paramenter :",backbone_shapes) #print("Anchor generate parameter : ",inferconfig.BACKBONE_STRIDES) #print("Anchor generate parameter : ",inferconfig.RPN_ANCHOR_STRIDE) #print("Original anchor shape is :", anchors.shape) anchors = np.broadcast_to(anchors, (inferconfig.BATCH_SIZE, ) + anchors.shape) anchors = utils.norm_boxes(anchors, imageshapeinfer[:2]) print("The input anchors shape is : ", anchors.shape) print('The input anchors are : \n', anchors) #print(image.shape) test_list = [] for count, op in enumerate(graph.get_operations()): if "detection" in op.name: print(op.name) test_list.append(op.name) #print(graph.get_operation_by_name('prefix/input_image')) # prefix/Placeholder/inputs_placeholder # ... # prefix/Accuracy/predictions # We access the input and output nodes # x = graph.get_tensor_by_name('prefix/*/inputs_placeholder:0')