def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] >= 5 # add float convert return bbox_transform(torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy()
def produce_batch(image_file, true_boxes): image = Image.open(image_file).resize((image_size, image_size), Image.NEAREST) data = asarray(image) / 255.0 del image proposals, anchor_probs = generate_proposals(data) del data # Non maximal suppression keep = py_cpu_nms(np.hstack((proposals, anchor_probs)), NSM_THRESHOLD) if post_nms_N > 0: keep = keep[:post_nms_N] proposals = proposals[keep, :] anchor_probs = anchor_probs[keep] # RCNN proposals #proposals = np.vstack( (proposals, true_boxes) ) overlaps = bbox_overlaps(proposals, enlarged_bboxes) which_box = overlaps.argmax(axis=1) proposal_max_overlaps = overlaps.max(axis=1) # sub sample foreground and background fg_inds = np.where(proposal_max_overlaps >= FG_THRESHOLD_RCNN)[0] fg_rois_in_image = min(int(BATCH_SIZE / (1 + BG_FG_FRAC_RCNN)), fg_inds.size) if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_in_image, replace=False) bg_inds = np.where((proposal_max_overlaps < BG_THRESH_HI) & (proposal_max_overlaps >= BG_THRESH_LO))[0] bg_rois_in_image = min(fg_rois_in_image, bg_inds.size) if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_in_image, replace=False) keep_inds = np.append(fg_inds, bg_inds) np.random.shuffle(keep_inds) # Select sampled values from various arrays: rois = proposals[keep_inds] # The chosen rois # Scores of chosen rois (fg=1, bg=0) new_scores = np.zeros(len(proposals)) new_scores[fg_inds] = 1 roi_scores = new_scores[keep_inds].reshape(-1, 1) # targets targets = np.zeros((len(proposals), 4)).reshape(-1, 4) targets[fg_inds] = bbox_transform(proposals[fg_inds], true_boxes[which_box[fg_inds]]) targets = targets[keep_inds] return rois, targets, roi_scores
def _compute_targets(ex_rois, gt_rois, labels): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) if False: # cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) return np.hstack( (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois, label): """Compute bounding-box regression targets for an image.""" # Inputs are tensor assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) if self.config['train_bbox_normalize_targets_precomputed']: # Optionally normalize targets by a precomputed mean and stdev means = self.config['train_bbox_normalize_means'] stds = self.config['train_bbox_normalize_stds'] targets = ((targets - targets.new(means)) / targets.new(stds)) return torch.cat([label.unsqueeze(1), targets], 1)
def define_bbox(pred_bbox_delta, ANCHOR_BOX): delta_x, delta_y, delta_w, delta_h = torch.unbind( pred_bbox_delta, dim=2) # set_anchors(mc, scale) anchor_x = ANCHOR_BOX[:, 0] anchor_y = ANCHOR_BOX[:, 1] anchor_w = ANCHOR_BOX[:, 2] anchor_h = ANCHOR_BOX[:, 3] box_center_x = anchor_x + delta_x * anchor_w box_center_y = anchor_y + delta_y * anchor_h # box_width = anchor_w * util.safe_exp(delta_w, EXP_THRESH) # box_height = anchor_h * util.safe_exp(delta_h, EXP_THRESH) box_width = anchor_w * torch.exp(delta_w) box_height = anchor_h * torch.exp(delta_h) # ok, this needs to be done on CPU side xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) xmins = xmins.cpu().detach().numpy() ymins = ymins.cpu().detach().numpy() xmaxs = xmaxs.cpu().detach().numpy() ymaxs = ymaxs.cpu().detach().numpy() # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = np.minimum( np.maximum(0.0, xmins), IMAGE_WIDTH-1.0) ymins = np.minimum( np.maximum(0.0, ymins), IMAGE_HEIGHT-1.0) xmaxs = np.maximum( np.minimum(IMAGE_WIDTH-1.0, xmaxs), 0.0) ymaxs = np.maximum( np.minimum(IMAGE_HEIGHT-1.0, ymaxs), 0.0) det_boxes = torch.transpose( torch.stack(util.bbox_transform_inv(torch.FloatTensor([xmins, ymins, xmaxs, ymaxs]))), 1, 2) # this is not needed for hardware implementation return det_boxes
def get_rpn_targets(self, targets): """ :param targets: (N, x1, y1, x2, y1, C) targets :return: rpn_labels (batch_size, 1), rpn_bbox_targets (batch_size, 4), keep (batch_size) indices at which the batch was sampled) """ all_anchor_boxes = self.all_anchor_boxes # anchor_boxes = self.filter_anchor_boxes(all_anchor_boxes) overlaps = get_overlaps(all_anchor_boxes, targets) labels = self.get_anchor_box_labels(overlaps) batch_labels, batch_anchor_boxes, batch_overlaps, keep = self.sample_batch( labels, all_anchor_boxes, overlaps) # assign anchors to targets anchor_assignments = np.argmax(batch_overlaps, axis=1) # compute target bbox deltas for rpn regressor head (256, 4) bbox_targets = bbox_transform(batch_anchor_boxes, targets[anchor_assignments]) bbox_targets = torch.from_numpy(bbox_targets) rpn_labels = torch.from_numpy(batch_labels).long() # convert properly batch_indices = torch.from_numpy(keep).long() return rpn_labels, bbox_targets, batch_indices # rpn indices to keep for loss
def get_targets(self, proposal_boxes, targets): """ Arguments: proposal_boxes (Tensor) : (# proposal boxes , 4) targets: (N, 5) Return: labels (Ndarray) : (256,) bbox_deltas[:, :-1] : (256, 4) batch_indices for targets that were sampled """ if not self.test: height, width = self.feature_map_dim[2:] indices = filter_cross_boundary_boxes(proposal_boxes, (height * 16, width * 16)) proposal_boxes = proposal_boxes[indices] targets_batch, proposals_batch, batch_indices = self.foreground_sample( proposal_boxes, targets) bbox_deltas = bbox_transform(proposals_batch, targets_batch) labels_batch = targets_batch[:, -1] labels_batch = torch.from_numpy(labels_batch).long() bbox_deltas = torch.from_numpy(bbox_deltas).float() batch_indices = torch.from_numpy(batch_indices).long() return labels_batch, bbox_deltas, batch_indices
def produce_batch(filepath, gt_boxes, scale): img = load_img(filepath) img_width = np.shape(img)[1] * scale[1] img_height = np.shape(img)[0] * scale[0] img = img.resize((int(img_width), int(img_height))) #feed image to pretrained model and get feature map img = img_to_array(img) img = np.expand_dims(img, axis=0) feature_map = pretrained_model.predict(img) height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height #calculate output w, h stride w_stride = img_width / width h_stride = img_height / height #generate base anchors according output stride. #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) #slice tiles according to image size and stride. #each 1x1x1532 feature map is mapping to a tile. shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #apply base anchors to all tiles, to have a num_feature_map*9 anchors. all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * 9 all_anchors = all_anchors.reshape((total_anchors, 4)) #only keep anchors inside image+borader. border = 0 inds_inside = np.where((all_anchors[:, 0] >= -border) & (all_anchors[:, 1] >= -border) & (all_anchors[:, 2] < img_width + border) & # width (all_anchors[:, 3] < img_height + border) # height )[0] anchors = all_anchors[inds_inside] # calculate overlaps each anchors to each gt boxes, # a matrix with shape [len(anchors) x len(gt_boxes)] overlaps = bbox_overlaps(anchors, gt_boxes) # find the gt box with biggest overlap to each anchors, # and the overlap ratio. result (len(anchors),) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # find the anchor with biggest overlap to each gt boxes, # and the overlap ratio. result (len(gt_boxes),) gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #labels, 1=fg/0=bg/-1=ignore labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # set positive label, define in Paper3.1.2: # We assign a positive label to two kinds of anchors: (i) the # anchor/anchors with the highest Intersection-overUnion # (IoU) overlap with a ground-truth box, or (ii) an # anchor that has an IoU overlap higher than 0.7 with any gt boxes labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= .7] = 1 # set negative labels labels[max_overlaps <= .3] = 0 # subsample positive labels if we have too many # num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] # if len(fg_inds) > num_fg: # disable_inds = npr.choice( # fg_inds, size=(len(fg_inds) - num_fg), replace=False) # labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = int(len(fg_inds) * BG_FG_FRAC) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # batch_inds = inds_inside[labels != -1] batch_inds = (batch_inds / k).astype(np.int) full_labels = unmap(labels, total_anchors, inds_inside, fill=-1) batch_label_targets = full_labels.reshape(-1, 1, 1, 1 * k)[batch_inds] bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :] pos_anchors = all_anchors[inds_inside[labels == 1]] bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels == 1]) bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels == 1], fill=0) batch_bbox_targets = bbox_targets.reshape(-1, 1, 1, 4 * k)[batch_inds] padded_fcmap = np.pad(feature_map, ((0, 0), (1, 1), (1, 1), (0, 0)), mode='constant') padded_fcmap = np.squeeze(padded_fcmap) batch_tiles = [] for ind in batch_inds: x = ind % width y = int(ind / width) fc_3x3 = padded_fcmap[y:y + 3, x:x + 3, :] batch_tiles.append(fc_3x3) return np.asarray(batch_tiles), batch_label_targets.tolist( ), batch_bbox_targets.tolist()
def __anchor_target_layer(self,rpn_cls_score,gt_boxes,im_info,feat_stride,anchor,A): allowed_border = 0 total_anchors = anchor.shape[0] height, width = rpn_cls_score.shape[1:3] inds_inside = np.where( (anchor[:,0] >= allowed_border) & (anchor[:,1] >= allowed_border) & (anchor[:,2] < im_info[1] + allowed_border) & (anchor[:,3] < im_info[0] + allowed_border) )[0] anchors = anchor[inds_inside,:] labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) #print("anchor detail..") #print(anchors) #print("gt_boxes detail..") #print(gt_boxes) overlaps = bbox_overlaps(anchors,gt_boxes) '''anchor class output..''' argmax_overlap = overlaps.argmax(axis= 1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlap] '''gt class output''' gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #print("argmax_overlap") #print(np.where(argmax_overlap > 0)) #print("gt_argmax_overlaps") #print(gt_argmax_overlaps.shape) #print(gt_argmax_overlaps) #print(anchors[gt_argmax_overlaps]) labels[max_overlaps < self._threshold_for_label_zero] = 0 labels[gt_argmax_overlaps] = 1 #print(gt_argmax_overlaps.shape) labels[max_overlaps > self._threshold_for_label_one] = 1 #print("label_one") #print(np.where(max_overlaps > self._threshold_for_label_one)[0].shape) fg_index = np.where(labels == 1)[0] bg_index = np.where(labels == 0)[0] fg_index_len =len(fg_index) bg_index_len =len(bg_index) '''always same ratio''' if 3* fg_index_len > bg_index_len: disable_inds = np.random.choice(fg_index,size = (3* fg_index_len - bg_index_len) ,replace = False) else: disable_inds = np.random.choice(bg_index,size = (bg_index_len - 3* fg_index_len) , replace = False) labels[disable_inds] = -1 #print(np.where(labels == 0)[0].shape) #print(np.where(labels == 1)[0].shape) #print("gt_boxes[argmax_overlap,:]") #print(gt_boxes[argmax_overlap, :]) #print(gt_boxes[argmax_overlap,:].shape) #print(gt_boxes[argmax_overlap,:][np.where(argmax_overlap > 0)]) #print(argmax_overlap.shape) bbox_targets = bbox_transform(anchors,gt_boxes[argmax_overlap,:]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array([1.0,1.0,1.0,1.0]) bbox_outside_weights = np.zeros((len(inds_inside),4), dtype=np.float32) num_example = np.sum(labels >= 0) positive_weight = np.ones((1,4)) * 1.0 / num_example negative_weight = np.ones((1,4)) * 1.0 / num_example bbox_outside_weights[labels == 1, :] = positive_weight bbox_outside_weights[labels == 0, :] = negative_weight labels = _unmap(labels, total_anchors, inds_inside,fill = -1) bbox_targets = _unmap(bbox_targets,total_anchors,inds_inside,fill = 0) bbox_inside_weights = _unmap(bbox_inside_weights,total_anchors,inds_inside,fill = 0) bbox_outside_weights= _unmap(bbox_outside_weights,total_anchors,inds_inside,fill = 0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) #print("label") #print(np.where(labels == 1)[0].shape) #print(np.where(labels == 0)[0].shape) # bbox_targets bbox_targets = bbox_targets.reshape((1, height, width, A * 4)) # bbox_inside_weights bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, A * 4)) # bbox_outside_weights bbox_outside_weights = bbox_outside_weights.reshape((1, height, width, A * 4)) return labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def produce_batch(feature_map, gt_boxes, h_w=None, category=None): height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height w_stride = h_w[1] / width h_stride = h_w[0] / height #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() all_anchors = (base_anchors.reshape((1, anchors_num, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * anchors_num all_anchors = all_anchors.reshape((total_anchors, 4)) # 用训练好的rpn进行预测,得出scores和deltas res = rpn_model.query_cnn(feature_map) scores = res[0] scores = scores.reshape(-1, 1) deltas = res[1] deltas = np.reshape(deltas, (-1, 4)) # 把dx dy转换成具体的xy值,并把照片以外的anchors去掉 proposals = bbox_transform_inv(all_anchors, deltas) proposals = clip_boxes(proposals, (h_w[0], h_w[1])) # remove small boxes keep = filter_boxes(proposals, small_box_threshold) # here threshold is 40 pixel proposals = proposals[keep, :] scores = scores[keep] # sort socres and only keep top 6000. pre_nms_topN = 6000 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # apply NMS to to 6000, and then keep top 300 post_nms_topN = 300 keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # 把ground true也加到proposals中 proposals = np.vstack((proposals, gt_boxes)) # calculate overlaps of proposal and gt_boxes overlaps = bbox_overlaps(proposals, gt_boxes) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_labels[gt_assignment] #? # sub sample fg_inds = np.where(max_overlaps >= FG_THRESH)[0] fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] bg_rois_per_this_image = BATCH - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # labels = labels[keep_inds] rois = proposals[keep_inds] gt_rois = gt_boxes[gt_assignment[keep_inds]] targets = bbox_transform(rois, gt_rois) #input rois rois_num = targets.shape[0] batch_box = np.zeros((rois_num, 200, 4)) for i in range(rois_num): batch_box[i, category] = targets[i] batch_box = np.reshape(batch_box, (rois_num, -1)) # get gt category batch_categories = np.zeros((rois_num, 200, 1)) for i in range(rois_num): batch_categories[i, category] = 1 batch_categories = np.reshape(batch_categories, (rois_num, -1)) return rois, batch_box, batch_categories
def produce_batch(filepath, gt_boxes, h_w, category): img = load_img(filepath) img_width = np.shape(img)[1] * scale[1] img_height = np.shape(img)[0] * scale[0] img = img.resize((int(img_width), int(img_height))) #feed image to pretrained model and get feature map img = img_to_array(img) img = np.expand_dims(img, axis=0) feature_map = pretrained_model.predict(img) height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height #calculate output w, h stride w_stride = h_w[1] / width h_stride = h_w[0] / height #generate base anchors according output stride. #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) #slice tiles according to image size and stride. #each 1x1x1532 feature map is mapping to a tile. shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #apply base anchors to all tiles, to have a num_feature_map*9 anchors. all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * 9 all_anchors = all_anchors.reshape((total_anchors, 4)) # feed feature map to pretrained RPN model, get proposal labels and bboxes. res = rpn_model.predict(feature_map) scores = res[0] scores = scores.reshape(-1, 1) deltas = res[1] deltas = np.reshape(deltas, (-1, 4)) # proposals transform to bbox values (x1, y1, x2, y2) proposals = bbox_transform_inv(all_anchors, deltas) proposals = clip_boxes(proposals, (h_w[0], h_w[1])) # remove small boxes, here threshold is 40 pixel keep = filter_boxes(proposals, 40) proposals = proposals[keep, :] scores = scores[keep] # sort socres and only keep top 6000. pre_nms_topN = 6000 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # apply NMS to to 6000, and then keep top 300 post_nms_topN = 300 keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # add gt_boxes to proposals. proposals = np.vstack((proposals, gt_boxes)) # calculate overlaps of proposal and gt_boxes overlaps = bbox_overlaps(proposals, gt_boxes) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_labels[gt_assignment] #? # sub sample fg_inds = np.where(max_overlaps >= FG_THRESH)[0] fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] bg_rois_per_this_image = BATCH - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # labels = labels[keep_inds] rois = proposals[keep_inds] gt_rois = gt_boxes[gt_assignment[keep_inds]] targets = bbox_transform(rois, gt_rois) #input rois rois_num = targets.shape[0] batch_box = np.zeros((rois_num, 200, 4)) for i in range(rois_num): batch_box[i, category] = targets[i] batch_box = np.reshape(batch_box, (rois_num, -1)) # get gt category batch_categories = np.zeros((rois_num, 200, 1)) for i in range(rois_num): batch_categories[i, category] = 1 batch_categories = np.reshape(batch_categories, (rois_num, -1)) return rois, batch_box, batch_categories
def conf_loss(self, y_true, y_pred): """ squeezeDet loss function for object detection and classification :param y_true: ground truth with shape [batchsize, #anchors, classes+8+labels] :param y_pred: :return: a tensor of the conf loss """ #handle for config mc = self.config #calculate non padded entries n_outputs = mc.CLASSES + 1 + 4 #slice and reshape network output y_pred = y_pred[:, :, 0:n_outputs] y_pred = K.reshape(y_pred, (mc.BATCH_SIZE, mc.N_ANCHORS_HEIGHT, mc.N_ANCHORS_WIDTH, -1)) #slice y_true input_mask = y_true[:, :, 0] input_mask = K.expand_dims(input_mask, axis=-1) box_input = y_true[:, :, 1:5] #number of objects. Used to normalize bbox and classification loss num_objects = K.sum(input_mask) #before computing the losses we need to slice the network outputs #number of class probabilities, n classes for each anchor num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES #number of confidence scores, one for each anchor + class probs num_confidence_scores = mc.ANCHOR_PER_GRID+num_class_probs #slice the confidence scores and put them trough a sigmoid for probabilities pred_conf = K.sigmoid( K.reshape( y_pred[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS] ) ) #slice remaining bounding box_deltas pred_box_delta = K.reshape( y_pred[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, 4] ) #compute boxes det_boxes = utils.boxes_from_deltas(pred_box_delta, mc) #again unstack is not avaible in pure keras backend unstacked_boxes_pred = [] unstacked_boxes_input = [] for i in range(4): unstacked_boxes_pred.append(det_boxes[:, :, i]) unstacked_boxes_input.append(box_input[:, :, i]) #compute the ious ious = utils.tensor_iou(utils.bbox_transform(unstacked_boxes_pred), utils.bbox_transform(unstacked_boxes_input), input_mask, mc ) #reshape input for correct broadcasting input_mask = K.reshape(input_mask, [mc.BATCH_SIZE, mc.ANCHORS]) #confidence score loss conf_loss = K.mean( K.sum( K.square((ious - pred_conf)) * (input_mask * mc.LOSS_COEF_CONF_POS / num_objects + (1 - input_mask) * mc.LOSS_COEF_CONF_NEG / (mc.ANCHORS - num_objects)), axis=[1] ), ) return conf_loss
def produce_batch(image_file, true_boxes): image_name = image_file.replace('.jpg','').replace(trainDIR ,'') image = Image.open(image_file).resize((image_size ,image_size ), Image.NEAREST) data = asarray(image)/255.0 del image feature_map = pretrained_model.predict(data.reshape(-1,data.shape[0],data.shape[1],data.shape[2])) del data feature_size = feature_map.shape[1] feature_stride = int( image_size / feature_size ) number_feature_points = feature_size * feature_size shift = np.arange(0, feature_size) * feature_stride shift_x, shift_y = np.meshgrid(shift, shift) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() base_anchors = generate_anchors(feature_stride, feature_stride,ratios = ANCHOR_RATIOS, scales = ANCHOR_SCALES) all_anchors = (base_anchors.reshape((1, anchor_number, 4)) + shifts.reshape((1, number_feature_points, 4)).transpose((1, 0, 2))) total_anchor_number = anchor_number*number_feature_points all_anchors = all_anchors.reshape((total_anchor_number , 4)) #only keep anchors inside image+border. border=0 # could also be FILTER_SIZE x feature stride inds_inside = np.where( (all_anchors[:, 0] >= -border) & (all_anchors[:, 1] >= -border) & (all_anchors[:, 2] < image_size+border ) & (all_anchors[:, 3] < image_size+border) )[0] anchors=all_anchors[inds_inside] useful_anchor_number = len(inds_inside) overlaps = bbox_overlaps(anchors, true_boxes) which_box = overlaps.argmax(axis=1) # Which true box has more overlap with each anchor? anchor_max_overlaps = overlaps[np.arange(overlaps.shape[0]), which_box] which_anchor = overlaps.argmax(axis=0) # Which anchor has more overlap for each true box? box_max_overlaps = overlaps[which_anchor, np.arange(overlaps.shape[1])] which_anchor_v2 = np.where(overlaps == box_max_overlaps)[0] labels = np.empty((useful_anchor_number, ), dtype=np.float32) labels.fill(-1) labels[ which_anchor_v2 ] = 1 labels[ anchor_max_overlaps >= FG_THRESHOLD] = 1 labels[ anchor_max_overlaps <= BG_THRESHOLD] = 0 fg_inds = np.where(labels == 1)[0] bg_inds = np.where(labels == 0)[0] num_fg = int(BATCH_SIZE/(1+BG_FG_FRAC)) if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] num_bg = int(len(fg_inds) * BG_FG_FRAC) if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bg_inds = np.where(labels == 0)[0] anchor_batch_inds = inds_inside[labels!=-1] np.random.shuffle(anchor_batch_inds) feature_batch_inds=(anchor_batch_inds / anchor_number).astype(np.int) pad_size = int((FILTER_SIZE-1)/2) padded_fcmap=np.pad(feature_map,((0,0),(pad_size,pad_size),(pad_size,pad_size),(0,0)),mode='constant') padded_fcmap=np.squeeze(padded_fcmap) batch_tiles=[] for ind in feature_batch_inds: # x,y are the point in the feature map pointed at by feature_batch_inds indices x = ind % feature_size y = int(ind/feature_size) fc_snip=padded_fcmap[y:y+FILTER_SIZE,x:x+FILTER_SIZE,:] batch_tiles.append(fc_snip) # unmap creates another array of labels that includes a -1 for the originally deleted anchors for being out of bounds. full_labels = unmap(labels, total_anchor_number , inds_inside, fill=-1) batch_labels =full_labels.reshape(-1,1,1,1*anchor_number)[feature_batch_inds] targets = np.zeros((len(inds_inside), 4), dtype=np.float32) pos_anchors=all_anchors[inds_inside[labels==1]] # positive anchors targets = bbox_transform(pos_anchors, true_boxes[which_box, :][labels==1]) targets = unmap(targets, total_anchor_number, inds_inside[labels==1], fill=0) batch_targets = targets.reshape(-1,1,1,4*anchor_number)[feature_batch_inds] return np.asarray(batch_tiles), batch_labels.tolist(), batch_targets.tolist()
def rpn_targets(self, all_anchors, im, gt): total_anchors = all_anchors.shape[0] gt_boxes = gt['boxes'] height, width = im.size()[-2:] # only keep anchors inside the image _allowed_border = 0 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < width + _allowed_border) & # width (all_anchors[:, 3] < height + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] #print(anchors.shape) # assert anchors.shape[0] > 0, '{0}x{1} -> {2}'.format(height,width,total_anchors) if anchors.shape[0] == 0: print('{0}x{1} -> {2}'.format(height, width, total_anchors)) return None, None # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) #overlaps = bbox_overlaps(anchors, gt_boxes)#.numpy() overlaps = bbox_overlaps(torch.from_numpy(anchors), gt_boxes).numpy() gt_boxes = gt_boxes.numpy() argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # assign bg labels first so that positive labels can clobber them labels[max_overlaps < self.negative_overlap] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= self.positive_overlap] = 1 # subsample positive labels if we have too many num_fg = int(self.fg_fraction * self.batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = self.batch_size - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) #bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :]) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) return labels, bbox_targets
def __anchor_target_layer(self, rpn_cls_score, gt_boxes, im_info, feat_stride, anchor, A): allowed_border = 0 total_anchors = anchor.shape[0] height, width = rpn_cls_score.shape[1:3] inds_inside = np.where((anchor[:, 0] >= allowed_border) & (anchor[:, 1] >= allowed_border) & (anchor[:, 2] < im_info[1] + allowed_border) & (anchor[:, 3] < im_info[0] + allowed_border))[0] anchors = anchor[inds_inside, :] labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) overlaps = bbox_overlaps(anchors, gt_boxes) argmax_overlap = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlap] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[max_overlaps < 0.3] = 0 labels[gt_argmax_overlaps] = 1 bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlap, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array([1.0, 1.0, 1.0, 1.0]) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) num_example = np.sum(labels >= 0) positive_weight = np.ones((1, 4)) * 1.0 / num_example negative_weight = np.ones((1, 4)) * 1.0 / num_example bbox_outside_weights[labels == 1, :] = positive_weight bbox_outside_weights[labels == 0, :] = negative_weight labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) # bbox_targets bbox_targets = bbox_targets.reshape((1, height, width, A * 4)) # bbox_inside_weights bbox_inside_weights = bbox_inside_weights.reshape( (1, height, width, A * 4)) # bbox_outside_weights bbox_outside_weights = bbox_outside_weights.reshape( (1, height, width, A * 4)) return labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def produce_batch(filepath, gt_boxes, w_h): # 首先加载feature_map feature_map=np.load(filepath)["fc"] # print("load feature map done.") # 获得feature map的长乘宽,即所有像素点数量 height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map=width*height # 用图片的长宽除以feature map的长宽,获得步长 img_width = w_h[0] img_height = w_h[1] w_stride = img_width / width h_stride = img_height / height # print("w_stride, h_stride", w_stride, h_stride) # 根据步长计算anchors #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) # base_anchors = generate_anchors(w_stride, h_stride, scales=np.asarray([1, 2, 4])) base_anchors = generate_anchors(16, 16, ratios=[0.5, 1], scales=np.asarray([1, 2, 8, 16])) #slice tiles according to image size and stride. #each 1x1x1532 feature map is mapping to a tile. shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) #这一步获得了分割点的所有横坐标及纵坐标 # 计算出了所有偏移的(x, y, x, y)值,为什么会重复两下,因为base_anchors输出的就是(0,0,w_stride-1,h_stride-1)的模式,需要同步偏移 shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # 事实证明,对shape为(1, 9, 4)的矩阵与shape为(num_feature_map, 1, 4)的矩阵相加结果是得到shape为(num_feature_map, 9, 4) all_anchors = (base_anchors.reshape((1, k, 4)) + shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map*k all_anchors = all_anchors.reshape((total_anchors, 4)) #only keep anchors inside image+borader. border=0 inds_inside = np.where( (all_anchors[:, 0] >= -border) & (all_anchors[:, 1] >= -border) & (all_anchors[:, 2] < img_width+border ) & # width (all_anchors[:, 3] < img_height+border) # height )[0] anchors=all_anchors[inds_inside] if len(anchors) == 0: return None, None, None # calculate overlaps each anchors to each gt boxes, # a matrix with shape [len(anchors) x len(gt_boxes)] overlaps = bbox_overlaps(anchors, gt_boxes) # find the gt box with biggest overlap to each anchors, # and the overlap ratio. result (len(anchors),) argmax_overlaps = overlaps.argmax(axis=1) # overlaps中每一行的最大值的索引值,即每一个anchor与哪一个gt_box得分最高,返回的是一维张量 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # 获得overlaps中每一列的最大值,即得分 # find the anchor with biggest overlap to each gt boxes, # and the overlap ratio. result (len(gt_boxes),) gt_argmax_overlaps = overlaps.argmax(axis=0) # overlaps中每一列的最大值的索引,即gt与哪个anchor最接近 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # 获得overlaps中每一列的最大值 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # 获得与最大值相同的列值(纵坐标) #labels, 1=fg/0=bg/-1=ignore 指在图片范围内的anchors的标签 labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # 根据论文,设置positive标签: # 只对两种anchor设置positive标签 # (1)与对每一个gt,IoU值最高的anchor # (2)对每一个anchor,其与所有gt的IoU最高分大于0.7的anchor labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= .7] = 1 # 设置负面标签 labels[max_overlaps <= .3] = 0 # subsample positive labels if we have too many # num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] # if len(fg_inds) > num_fg: # disable_inds = npr.choice( # fg_inds, size=(len(fg_inds) - num_fg), replace=False) # labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = int(len(fg_inds) * BG_FG_FRAC) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: # 因为背景太多了,随机选出多余个的设置成忽略 disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) # 从np.arange(0, bg_inds)中随机选len(bg_inds) - num_bg个 labels[disable_inds] = -1 # 从这里开始,计算batch,batch_inds是所有不被忽略的points batch_inds=inds_inside[labels!=-1] # 是这样的,首先batch_inds获得了在特征图内部的的anchor的索引值,又因为anchor排列是按9个9个排下来的,因此除9就是为了得到这个anchor对应的坐标 batch_inds=(batch_inds / k).astype(np.int) # 获得对应于所有anchos的label full_labels = unmap(labels, total_anchors, inds_inside, fill=-1) # batch_label_targets为n个1*1*k的 batch_label_targets=full_labels.reshape(-1,1,1,1*k)[batch_inds] bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :] # 获得标签为fg的anchors pos_anchors=all_anchors[inds_inside[labels==1]] # 归一化? bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels==1]) bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels==1], fill=0) batch_bbox_targets = bbox_targets.reshape(-1,1,1,4*k)[batch_inds] # 在feature_map的第二个和第三个轴前后各填充一个值 padded_fcmap=np.pad(feature_map,((0,0),(1,1),(1,1),(0,0)),mode='constant') # 把padded_fcmap中维度为1的轴去掉,预期得到的是3维 padded_fcmap=np.squeeze(padded_fcmap) batch_tiles=[] for ind in batch_inds: x = ind % width y = int(ind/width) fc_3x3=padded_fcmap[y:y+3,x:x+3,:] batch_tiles.append(fc_3x3) # print("produce batch done.") return np.asarray(batch_tiles), batch_label_targets.tolist(), batch_bbox_targets.tolist()
def loss_without_regularization(self, y_true, y_pred): """ squeezeDet loss function for object detection and classification :param y_true: ground truth with shape [batchsize, #anchors, classes+8+labels] :param y_pred: :return: a tensor of the total loss """ #handle for config mc = self.config #slice y_true input_mask = y_true[:, :, 0] input_mask = K.expand_dims(input_mask, axis=-1) box_input = y_true[:, :, 1:5] box_delta_input = y_true[:, :, 5:9] labels = y_true[:, :, 9:] #number of objects. Used to normalize bbox and classification loss num_objects = K.sum(input_mask) #before computing the losses we need to slice the network outputs pred_class_probs, pred_conf, pred_box_delta = utils.slice_predictions(y_pred, mc) #compute boxes det_boxes = utils.boxes_from_deltas(pred_box_delta, mc) #again unstack is not avaible in pure keras backend unstacked_boxes_pred = [] unstacked_boxes_input = [] for i in range(4): unstacked_boxes_pred.append(det_boxes[:, :, i]) unstacked_boxes_input.append(box_input[:, :, i]) #compute the ious ious = utils.tensor_iou(utils.bbox_transform(unstacked_boxes_pred), utils.bbox_transform(unstacked_boxes_input), input_mask, mc) # cross-entropy: q * -log(p) + (1-q) * -log(1-p) # add a small value into log to prevent blowing up #compute class loss class_loss = K.sum(labels * (-K.log(pred_class_probs + mc.EPSILON)) + (1 - labels) * (-K.log(1 - pred_class_probs + mc.EPSILON)) * input_mask * mc.LOSS_COEF_CLASS) / num_objects #bounding box loss bbox_loss = (K.sum(mc.LOSS_COEF_BBOX * K.square(input_mask * (pred_box_delta - box_delta_input))) / num_objects) #reshape input for correct broadcasting input_mask = K.reshape(input_mask, [mc.BATCH_SIZE, mc.ANCHORS]) #confidence score loss conf_loss = K.mean( K.sum( K.square((ious - pred_conf)) * (input_mask * mc.LOSS_COEF_CONF_POS / num_objects + (1 - input_mask) * mc.LOSS_COEF_CONF_NEG / (mc.ANCHORS - num_objects)), axis=[1] ), ) # add above losses total_loss = class_loss + conf_loss + bbox_loss return total_loss