def __init__(self): self.img_paths = [] self.labels = [] self.output_shape = image_shape self.rgb_means = rgb_means f = open(txt_path, 'r') lines = f.readlines() isFirst = True img_label = np.zeros((0, 15)) for line in lines: line = line.rstrip() if line.startswith('#'): if isFirst is True: isFirst = False else: img_label_copy = img_label.copy() self.labels.append(img_label_copy) img_label = np.zeros((0, 15)) path = line[2:] path = txt_path.replace('label.txt', 'images/') + path self.img_paths.append(path) else: line = line.split(' ') line = [float(x) for x in line] label = np.zeros((1, 15)) label[0, 0] = line[0] # x1 label[0, 1] = line[1] # y1 label[0, 2] = line[0] + line[2] # x2 label[0, 3] = line[1] + line[3] # y2 # landmarks label[0, 4] = line[4] # l0_x label[0, 5] = line[5] # l0_y label[0, 6] = line[7] # l1_x label[0, 7] = line[8] # l1_y label[0, 8] = line[10] # l2_x label[0, 9] = line[11] # l2_y label[0, 10] = line[13] # l3_x label[0, 11] = line[14] # l3_y label[0, 12] = line[16] # l4_x label[0, 13] = line[17] # l4_y if (label[0, 4] < 0): label[0, 14] = -1 else: label[0, 14] = 1 img_label = np.append(img_label, label, axis=0) self.labels.append(img_label) self.anchors = generate_anchors(min_sizes, steps, image_shape) self.total = len(self.img_paths)
def test_generate_anchors(self): DEBUG = False if DEBUG: image = cv2.imread(os.path.join('images', '1.jpg')) anchors = generate_anchors(image.shape, scales=[1 / 2, 2], base_size=32, stride=32) print(anchors.shape) anchors = anchors.reshape(-1, 4) for anchor in anchors: cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0, 0, 255), 1) cv2.imshow('anchors', image) cv2.waitKey(0)
def build(self): ############## # Set Inputs ############## if self.mode == 'inference_init': # Input template's batch size is nailed to 1. inp_template = Input(batch_shape=(1, ) + self.config.template_size, name='inp_template') elif self.mode == 'inference': # When evaluating batch size must be 1.!!!!!! assert self.config.batch_size == 1 inp_img = Input(shape=self.config.instance_size, name='inp_img') # Generate anchors for every batch, anchors = generate_anchors(self.config.total_stride, self.config.scales, self.config.ratios, self.config.score_size) anchors = np.broadcast_to(anchors, (self.config.batch_size, ) + anchors.shape) #shape=(1, 19, 19, 5, 4) ########################### # Set Backbone ########################### self.encoder = build_encoder() if self.mode == 'inference': encoded_img = self.encoder(inp_img) model = Model([inp_img], outputs=encoded_img, name='bb_alex_large') return model elif self.mode == 'inference_init': cls_filters = 2 * self.config.num_anchors * self.config.encoder_out_filter #5120 bbox_filters = 4 * self.config.num_anchors * self.config.encoder_out_filter #10240 encoded_template = self.encoder(inp_template) model = Model([inp_template], encoded_template, name='bb_alex_small') return model
def __init__(self, feature_architecture='resnet', anchor_scales=(128, 256, 512), feat_stride=16, negative_overlap=0.3, positive_overlap=0.7, fg_fraction=0.5, batch_size=128, nms_thresh=0.7, pre_nms_limit=6000, post_nms_limit=2000): super(RegionProposalNetwork, self).__init__() # Setup if feature_architecture == 'vgg16': input_dims = 512 else: input_dims = 256 self.test = False self.anchors = generate_anchors(feat_stride=feat_stride, scales=anchor_scales) self.num_anchors = self.anchors.shape[0] self.feat_stride = feat_stride # how much smaller is the feature map than the original image self.negative_overlap = negative_overlap self.positive_overlap = positive_overlap self.fg_fraction = fg_fraction self.batch_size = batch_size # used for both train and test self.nms_thresh = nms_thresh self.pre_nms_limit = pre_nms_limit self.post_nms_limit = post_nms_limit # for calcing targets self.all_anchor_boxes = None self.feature_map_dim = None # (N, C, H, W) # layers self.rpn_conv1 = nn.Conv2d(input_dims, 512, kernel_size=3, padding=1) self.conv_classify = nn.Conv2d(512, 2 * 9, kernel_size=1) self.conv_bbox_regr = nn.Conv2d(512, 4 * 9, kernel_size=1)
def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs): """ Initializer for an Anchors layer. Args size: The base size of the anchors to generate. stride: The stride of the anchors to generate. ratios: The ratios of the anchors to generate (defaults to AnchorParameters.default.ratios). scales: The scales of the anchors to generate (defaults to AnchorParameters.default.scales). """ self.size = size self.stride = stride self.ratios = ratios self.scales = scales if ratios is None: self.ratios = utils_anchors.AnchorParameters.default.ratios elif isinstance(ratios, list): self.ratios = np.array(ratios) if scales is None: self.scales = utils_anchors.AnchorParameters.default.scales elif isinstance(scales, list): self.scales = np.array(scales) self.num_anchors = len(self.ratios) * len(self.scales) self.anchors = keras.backend.variable( utils_anchors.generate_anchors( base_size=self.size, ratios=self.ratios, scales=self.scales, )) super(Anchors, self).__init__(*args, **kwargs)
def test_generate_minibatch(self): DEBUG = True if DEBUG: image = np.ones((500, 500, 3)) box_size = 60 bounding_boxes = np.array( [[100, 100, 100 + box_size, 100 + box_size], [300, 300, 300 + box_size, 300 + box_size]]) for box in bounding_boxes: image[box[1]:box[3], box[0]:box[2]] = 0 anchors = generate_anchors(image.shape) anchors_batch_indices, _, _ = generate_minibatch_mask( anchors, bounding_boxes, batch_size=64) anchors = anchors.reshape(-1, 4) anchors_indices = anchors_batch_indices.reshape(-1, ) for anchor in anchors[anchors_indices == -1, :]: cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0, 0, 255), 1) for anchor in anchors[anchors_indices == 1, :]: cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0, 255, 0), 1) cv2.imshow('anchors', image) cv2.waitKey(0)
def test_classify_anchors(self): DEBUG = False if DEBUG: image = np.ones((500, 500, 3)) box_size = 60 bounding_boxes = np.array( [[100, 100, 100 + box_size, 100 + box_size], [300, 300, 300 + box_size, 300 + box_size]]) for box in bounding_boxes: image[box[1]:box[3], box[0]:box[2]] = 0 anchors = generate_anchors(image.shape).reshape(-1, 4) anchors_classes = classify_anchors(bounding_boxes, anchors) for anchor in anchors[anchors_classes == -1]: cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0, 0, 255), 1) for anchor in anchors[anchors_classes == 0]: cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (255, 0, 0), 1) for anchor in anchors[anchors_classes == 1]: cv2.rectangle(image, (int(anchor[0]), int(anchor[1])), (int(anchor[2]), int(anchor[3])), (0, 255, 0), 1) cv2.imshow('anchors', image) cv2.waitKey(0)
def build(self): ############## # Inputs ############## if self.mode == 'inference': # When evaluating batch size must be 1.!!!!!! assert self.config.batch_size == 1 inp_img = KL.Input(shape=self.config.instance_size, name='inp_img') # Generate anchors for every batch, anchors = generate_anchors(self.config.total_stride, self.config.scales, self.config.ratios, self.config.score_size) anchors = np.broadcast_to(anchors, (self.config.batch_size, ) + anchors.shape) anchors = KL.Lambda(lambda x: K.variable(anchors), name='inp_anchors')(inp_img) #inp_template = KL.Input(batch_shape = (1,)+self.config.template_size, name='inp_template') cls_template = KL.Lambda( lambda x: K.variable(self.config.cls_template), name='cls_template')(inp_img) bbox_template = KL.Lambda( lambda x: K.variable(self.config.bbox_template), name='bbox_template')(inp_img) elif self.mode == 'inference_init': # Input template's batch size is nailed to 1. inp_template = KL.Input(batch_shape=(1, ) + self.config.template_size, name='inp_template') ########################### # Encoder ########################### self.encoder = build_encoder() if self.mode == 'inference_init': ########### # Init ########### cls_filters = 2 * self.config.num_anchors * self.config.encoder_out_filter bbox_filters = 4 * self.config.num_anchors * self.config.encoder_out_filter encoded_template = self.encoder(inp_template) cls_template = KL.Conv2D(cls_filters, (3, 3), name='conv_cls1')(encoded_template) bbox_template = KL.Conv2D(bbox_filters, (3, 3), name='conv_r1')(encoded_template) outputs = [cls_template, bbox_template] return KM.Model([inp_template], outputs, name='Siamese_init') elif self.mode == 'inference': ################### # Inference ################### encoded_img = self.encoder(inp_img) cls_img = KL.Conv2D(self.config.encoder_out_filter, (3, 3), name='conv_cls2')(encoded_img) bbox_img = KL.Conv2D(self.config.encoder_out_filter, (3, 3), name='conv_r2')(encoded_img) cls_out = CONV(self.config, name='cls_nn_conv')([cls_img, cls_template]) bbox_out = CONV(self.config, name='box_nn_conv')([bbox_img, bbox_template]) bbox_out = KL.Conv2D(4 * self.config.num_anchors, 1, name='regress_adjust')(bbox_out) outputs = KL.Lambda(lambda x: eval_graph(*x, config=self.config), name='Eval')([bbox_out, cls_out, anchors]) return KM.Model([inp_img], outputs, name='Siamese_inference')
def __init__(self, inputs): """ Region proposal net - inputs should be a list of [convolution model, tuple(image_h, image_w, image_scale)] """ self.conv_in, self.im_info = inputs ## inputs is a convolutional net (i.e. VGG or ZFNet) before the fully-connected layers. super(RPN, self).__init__(inputs) in_filters = self.conv_in.output_size[1] # 512 # RPN conv layers classes = 2 n_anchors = 9 min_size = 16 anchor_size = 16 nms_thresh = 0.7 topN = 2000 self.conv = Conv2D(inputs=self.conv_in, n_filters=in_filters, filter_size=(3, 3), stride=(1, 1), activation='relu', border_mode='full') self.cls_score = Conv2D(inputs=self.conv, n_filters=classes*n_anchors, filter_size=(1, 1), stride=(1, 1), activation='linear', border_mode='valid') # need to dimshuffle/flatten it down to get the softmax class probabilities for each class of `classes` cls_shape = self.cls_score.get_outputs().shape cls_score = self.cls_score.get_outputs().reshape((cls_shape[0], classes, -1, cls_shape[3])) # shuffle to (classes, batch, row, col) cls_shuffle = cls_score.dimshuffle((1, 0, 2, 3)) # flatten to (classes, batch*row*col) cls_flat = cls_shuffle.flatten(2) # shuffle to (batch*row*col, classes) cls_flat = cls_flat.dimshuffle((1, 0)) # softmax for probability! cls_probs_flat = T.nnet.softmax(cls_flat) # now shuffle back up to 4D output from cls_score (undo what we did) cls_probs = cls_probs_flat.dimshuffle((1, 0)).reshape(cls_shuffle.shape) cls_probs = cls_probs.dimshuffle((1, 0, 2, 3)) self.cls_probs = cls_probs.reshape(cls_shape) self.bbox_pred = Conv2D(inputs=self.conv, n_filters=4*n_anchors, filter_size=(1, 1), stride=(1, 1), activation='linear', border_mode='valid') ############### # 1. Generate proposals from bbox deltas and shifted anchors (ROIs) ############### anchors = theano.shared(generate_anchors(anchor_size)) object_probs = self.cls_probs[:, n_anchors:, :, :] bbox_deltas = self.bbox_pred.get_outputs() # height and width of convolution features H, W = object_probs.shape[-2:] # essentially do numpy's meshgrid by tiling anchors across height and width of convolution features shift_x = (T.arange(0, W) * anchor_size).reshape((1, W)) shift_y = (T.arange(0, H) * anchor_size).reshape((1, H)) shift_x = T.tile(shift_x, (H, 1)) shift_y = T.tile(shift_y.T, (1, W)) shifts = T.stack([shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel()]).T # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = n_anchors K = shifts.shape[0] anchors = anchors.reshape((1, A, 4)) + shifts.reshape((K, 1, 4)) anchors = anchors.reshape((K*A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.dimshuffle((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the object scores: # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = object_probs.dimshuffle((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, self.im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = filter_boxes(proposals, min_size * self.im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest order = scores.ravel().argsort()[::-1] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 2000) # 8. return the top proposals (-> RoIs top) keep, self.updates = nms(T.concatenate([proposals, scores], axis=1), nms_thresh) keep = keep[:topN] self.proposals = proposals[keep, :] self.scores = scores[keep] self.outputs = [self.proposals, self.scores] # self.output_size = [self.cls_score.output_size, self.bbox_pred.output_size] self.params = {} self.params.update(p_dict("rpn_conv/3x3_", self.conv)) self.params.update(p_dict("rpn_cls_score_", self.cls_score)) self.params.update(p_dict("rpn_bbox_pred_", self.bbox_pred))
def __init__(self, inputs): """ Region proposal net - inputs should be a list of [convolution model, tuple(image_h, image_w, image_scale)] """ self.conv_in, self.im_info = inputs ## inputs is a convolutional net (i.e. VGG or ZFNet) before the fully-connected layers. super(RPN, self).__init__(inputs) in_filters = self.conv_in.output_size[1] # 512 # RPN conv layers classes = 2 n_anchors = 9 min_size = 16 anchor_size = 16 nms_thresh = 0.7 topN = 2000 self.conv = Conv2D(inputs=self.conv_in, n_filters=in_filters, filter_size=(3, 3), stride=(1, 1), activation='relu', border_mode='full') self.cls_score = Conv2D(inputs=self.conv, n_filters=classes * n_anchors, filter_size=(1, 1), stride=(1, 1), activation='linear', border_mode='valid') # need to dimshuffle/flatten it down to get the softmax class probabilities for each class of `classes` cls_shape = self.cls_score.get_outputs().shape cls_score = self.cls_score.get_outputs().reshape( (cls_shape[0], classes, -1, cls_shape[3])) # shuffle to (classes, batch, row, col) cls_shuffle = cls_score.dimshuffle((1, 0, 2, 3)) # flatten to (classes, batch*row*col) cls_flat = cls_shuffle.flatten(2) # shuffle to (batch*row*col, classes) cls_flat = cls_flat.dimshuffle((1, 0)) # softmax for probability! cls_probs_flat = T.nnet.softmax(cls_flat) # now shuffle back up to 4D output from cls_score (undo what we did) cls_probs = cls_probs_flat.dimshuffle( (1, 0)).reshape(cls_shuffle.shape) cls_probs = cls_probs.dimshuffle((1, 0, 2, 3)) self.cls_probs = cls_probs.reshape(cls_shape) self.bbox_pred = Conv2D(inputs=self.conv, n_filters=4 * n_anchors, filter_size=(1, 1), stride=(1, 1), activation='linear', border_mode='valid') ############### # 1. Generate proposals from bbox deltas and shifted anchors (ROIs) ############### anchors = theano.shared(generate_anchors(anchor_size)) object_probs = self.cls_probs[:, n_anchors:, :, :] bbox_deltas = self.bbox_pred.get_outputs() # height and width of convolution features H, W = object_probs.shape[-2:] # essentially do numpy's meshgrid by tiling anchors across height and width of convolution features shift_x = (T.arange(0, W) * anchor_size).reshape((1, W)) shift_y = (T.arange(0, H) * anchor_size).reshape((1, H)) shift_x = T.tile(shift_x, (H, 1)) shift_y = T.tile(shift_y.T, (1, W)) shifts = T.stack([ shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel() ]).T # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = n_anchors K = shifts.shape[0] anchors = anchors.reshape((1, A, 4)) + shifts.reshape((K, 1, 4)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.dimshuffle((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the object scores: # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = object_probs.dimshuffle((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, self.im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = filter_boxes(proposals, min_size * self.im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest order = scores.ravel().argsort()[::-1] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 2000) # 8. return the top proposals (-> RoIs top) keep, self.updates = nms(T.concatenate([proposals, scores], axis=1), nms_thresh) keep = keep[:topN] self.proposals = proposals[keep, :] self.scores = scores[keep] self.outputs = [self.proposals, self.scores] # self.output_size = [self.cls_score.output_size, self.bbox_pred.output_size] self.params = {} self.params.update(p_dict("rpn_conv/3x3_", self.conv)) self.params.update(p_dict("rpn_cls_score_", self.cls_score)) self.params.update(p_dict("rpn_bbox_pred_", self.bbox_pred))