def produce_batch(image_file, true_boxes): image = Image.open(image_file).resize((image_size, image_size), Image.NEAREST) data = asarray(image) / 255.0 del image proposals, anchor_probs = generate_proposals(data) del data # Non maximal suppression keep = py_cpu_nms(np.hstack((proposals, anchor_probs)), NSM_THRESHOLD) if post_nms_N > 0: keep = keep[:post_nms_N] proposals = proposals[keep, :] anchor_probs = anchor_probs[keep] # RCNN proposals #proposals = np.vstack( (proposals, true_boxes) ) overlaps = bbox_overlaps(proposals, enlarged_bboxes) which_box = overlaps.argmax(axis=1) proposal_max_overlaps = overlaps.max(axis=1) # sub sample foreground and background fg_inds = np.where(proposal_max_overlaps >= FG_THRESHOLD_RCNN)[0] fg_rois_in_image = min(int(BATCH_SIZE / (1 + BG_FG_FRAC_RCNN)), fg_inds.size) if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_in_image, replace=False) bg_inds = np.where((proposal_max_overlaps < BG_THRESH_HI) & (proposal_max_overlaps >= BG_THRESH_LO))[0] bg_rois_in_image = min(fg_rois_in_image, bg_inds.size) if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_in_image, replace=False) keep_inds = np.append(fg_inds, bg_inds) np.random.shuffle(keep_inds) # Select sampled values from various arrays: rois = proposals[keep_inds] # The chosen rois # Scores of chosen rois (fg=1, bg=0) new_scores = np.zeros(len(proposals)) new_scores[fg_inds] = 1 roi_scores = new_scores[keep_inds].reshape(-1, 1) # targets targets = np.zeros((len(proposals), 4)).reshape(-1, 4) targets[fg_inds] = bbox_transform(proposals[fg_inds], true_boxes[which_box[fg_inds]]) targets = targets[keep_inds] return rois, targets, roi_scores
def transform_boxes_v2(self, num_classes, im): batch = self.obj_blob.shape[0] height = self.obj_blob.shape[1] width = self.obj_blob.shape[2] num_anchors = self.anchors.shape[0] self.obj_blob = self.obj_blob.reshape(-1) self.cls_blob = self.cls_blob.reshape(-1) self.loc_blob = self.loc_blob.reshape(-1) self.ori_blob = self.ori_blob.reshape(-1) self.dim_blob = self.dim_blob.reshape(-1) self.lof_blob = self.lof_blob.reshape(-1) self.lor_blob = self.lor_blob.reshape(-1) ret_list = [] self.t1 = time.time() for i in xrange(height * width): row = i / width col = i % width for n in range(num_anchors): index = i * num_anchors + n scale = self.obj_blob[index] class_index = index * num_classes for k in range(0, num_classes): prob = scale * self.cls_blob[class_index + k] if prob > self.det_thresh: ret_list.append(self.obtain_boxes(index,row,col,n,width,height,prob,k)) self.t2 = time.time() print("t:1-2:{}".format(self.t2 - self.t1)) if len(ret_list) != 0: ret_list = np.array(ret_list, dtype=np.float32) keep = py_cpu_nms(ret_list, self.nms_thresh) ret_list = ret_list[keep] self.t3=time.time() print("t2-3:{}".format(self.t3-self.t2)) return self.change2objs(ret_list)
def nms_box(): pass def nms_3d(): pass if __name__ == "__main__": objects = read_label(file_name) labels, objs_list, prefix = split_predict_result(objects) # print(labels) # print(objs_list) # print(prefix) image = cv2.imread("/Volumes/Will 1/thesis/cus_ktti_vis/data_proc/data/000015.png") new_objects = [] for idx, objs in enumerate(objs_list): # print(objs) for obj in objs: cv2.rectangle(image, (int(obj[0]), int(obj[1])), (int(obj[2]), int(obj[3])), (0, 255, 0), 2) keep = (py_cpu_nms(np.asarray(objs), 0.1)) for k in keep: new_objects.append(objects[k + prefix[idx]]) print((new_objects)) objects_to_label(new_objects, "000015_pred_mod.txt") cv2.imwrite("./test.jpg", image)
landms = landms.cpu().numpy() inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, self.nms_threshold,force_cpu=self.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image for b in dets: if b[4] < vis_thres: continue b = list(map(int, b)) x1, y1, x2, y2 = b[0], b[1], b[2], b[3] img_box = srcimg[y1:y2 + 1, x1:x2 + 1, :]
def produce_batch(feature_map, gt_boxes, h_w=None, category=None): height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height w_stride = h_w[1] / width h_stride = h_w[0] / height #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() all_anchors = (base_anchors.reshape((1, anchors_num, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * anchors_num all_anchors = all_anchors.reshape((total_anchors, 4)) # 用训练好的rpn进行预测,得出scores和deltas res = rpn_model.query_cnn(feature_map) scores = res[0] scores = scores.reshape(-1, 1) deltas = res[1] deltas = np.reshape(deltas, (-1, 4)) # 把dx dy转换成具体的xy值,并把照片以外的anchors去掉 proposals = bbox_transform_inv(all_anchors, deltas) proposals = clip_boxes(proposals, (h_w[0], h_w[1])) # remove small boxes keep = filter_boxes(proposals, small_box_threshold) # here threshold is 40 pixel proposals = proposals[keep, :] scores = scores[keep] # sort socres and only keep top 6000. pre_nms_topN = 6000 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # apply NMS to to 6000, and then keep top 300 post_nms_topN = 300 keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # 把ground true也加到proposals中 proposals = np.vstack((proposals, gt_boxes)) # calculate overlaps of proposal and gt_boxes overlaps = bbox_overlaps(proposals, gt_boxes) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_labels[gt_assignment] #? # sub sample fg_inds = np.where(max_overlaps >= FG_THRESH)[0] fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] bg_rois_per_this_image = BATCH - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # labels = labels[keep_inds] rois = proposals[keep_inds] gt_rois = gt_boxes[gt_assignment[keep_inds]] targets = bbox_transform(rois, gt_rois) #input rois rois_num = targets.shape[0] batch_box = np.zeros((rois_num, 200, 4)) for i in range(rois_num): batch_box[i, category] = targets[i] batch_box = np.reshape(batch_box, (rois_num, -1)) # get gt category batch_categories = np.zeros((rois_num, 200, 1)) for i in range(rois_num): batch_categories[i, category] = 1 batch_categories = np.reshape(batch_categories, (rois_num, -1)) return rois, batch_box, batch_categories
def det_rec(self, srcimg): img = np.float32(srcimg) im_height, im_width, _ = img.shape img -= (104, 117, 123) with torch.no_grad(): scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]).to(device) img = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0).to( device) ###注意输入图片并没有resize到固定尺寸 loc, conf, landms = self.net(img) # forward pass prior_data = self.priorbox((im_height, im_width)).to(device) boxes = decode(loc.data.squeeze(0), prior_data, cfg_mnet['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_mnet['variance']) landms = landms * scale.repeat(2) / self.resize landms = landms.cpu().numpy() inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) # keep = nms(dets, self.nms_threshold,force_cpu=self.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image for b in dets: if b[4] < self.vis_thres: continue # text = "{:.4f}".format(b[4]) # print(text) b = list(map(int, b)) x1, y1, x2, y2 = b[0], b[1], b[2], b[3] img_box = srcimg[y1:y2 + 1, x1:x2 + 1, :] new_x1, new_y1 = b[9] - x1, b[10] - y1 new_x2, new_y2 = b[11] - x1, b[12] - y1 new_x3, new_y3 = b[7] - x1, b[8] - y1 new_x4, new_y4 = b[5] - x1, b[6] - y1 # 定义对应的点 points1 = np.float32([[new_x1, new_y1], [new_x2, new_y2], [new_x3, new_y3], [new_x4, new_y4]]) # 计算得到转换矩阵 M = cv2.getPerspectiveTransform(points1, self.points_ref) # 实现透视变换转换 processed = cv2.warpPerspective(img_box, M, (94, 24)) result = self.lprnet.rec(processed) cv2.rectangle(srcimg, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # landms cv2.circle(srcimg, (b[5], b[6]), 2, (255, 0, 0), thickness=5) cv2.circle(srcimg, (b[7], b[8]), 2, (255, 0, 0), thickness=5) cv2.circle(srcimg, (b[9], b[10]), 2, (255, 0, 0), thickness=5) cv2.circle(srcimg, (b[11], b[12]), 2, (255, 0, 0), thickness=5) # cv2.putText(srcimg, result, (b[0], b[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), thickness=3) srcimg = puttext_chinese(srcimg, result, (b[0], b[1] - 30), (0, 255, 0)) return srcimg
def transform_boxes(self, num_classes, im): batch = self.obj_blob.shape[0] height = self.obj_blob.shape[1] width = self.obj_blob.shape[2] num_anchors = self.anchors.shape[0] obj_pred = self.obj_blob.reshape(-1) cls_pred = self.cls_blob.reshape(-1) loc_pred = self.loc_blob.reshape(-1) ori_pred = self.ori_blob.reshape(-1) dim_pred = self.dim_blob.reshape(-1) lof_pred = self.lof_blob.reshape(-1) lor_pred = self.lor_blob.reshape(-1) ret_list = [] self.t1 = time.time() for i in xrange(height * width): row = i / width col = i % width for n in range(num_anchors): obj_np = np.zeros(18) index = i * num_anchors + n scale = obj_pred[index] ori_index = index * 2 orientation = math.atan2(ori_pred[index + 1], ori_pred[index]) dim_index = index * 3 d3_h = dim_pred[dim_index + 0] d3_w = dim_pred[dim_index + 1] d3_l = dim_pred[dim_index + 2] box_index = index * 4 cx = (col + sigmoid(loc_pred[box_index + 0])) / (width * 1.0) cy = (row + sigmoid(loc_pred[box_index + 1])) / (height * 1.0) w = math.exp(loc_pred[box_index + 2]) * self.anchors[n, 0] / (width * 1.0) * 0.5 h = math.exp(loc_pred[box_index + 3]) * self.anchors[n, 1] / (height * 1.0) * 0.5 lof_index = index * 4 lof_x = lof_pred[lof_index + 0] * w * 2 + cx lof_y = lof_pred[lof_index + 1] * h * 2 + cy lof_w = math.exp(lof_pred[lof_index + 2]) * w lof_h = math.exp(lof_pred[lof_index + 3]) * h lor_index = index * 4 lor_x = lor_pred[lor_index + 0] * w * 2 + cx lor_y = lor_pred[lor_index + 1] * h * 2 + cy lor_w = math.exp(lor_pred[lor_index + 2]) * w lor_h = math.exp(lor_pred[lor_index + 3]) * h cx = self.crop_img_width * cx cy = self.crop_img_height * cy w = self.crop_img_width * w h = self.crop_img_height * h lof_x = self.crop_img_width * lof_x lof_y = self.crop_img_height * lof_y lof_w = self.crop_img_width * lof_w lof_h = self.crop_img_height * lof_h lor_x = self.crop_img_width * lor_x lor_y = self.crop_img_height * lor_y lor_w = self.crop_img_width * lor_w lor_h = self.crop_img_height * lor_h class_index = index * num_classes for k in range(0, num_classes): prob = scale * cls_pred[class_index + k] if prob > self.det_thresh: obj_np[0] = cx - w obj_np[1] = cy - h + self.offset_y obj_np[2] = cx + w obj_np[3] = cy + h + self.offset_y obj_np[4] = prob obj_np[5] = k obj_np[6] = orientation obj_np[7] = d3_h obj_np[8] = d3_w obj_np[9] = d3_l obj_np[10] = lof_x - lof_w obj_np[11] = lof_y - lof_h + self.offset_y obj_np[12] = lof_x + lof_w obj_np[13] = lof_y + lof_h + self.offset_y obj_np[14] = lor_x - lor_w obj_np[15] = lor_y - lor_h + self.offset_y obj_np[16] = lor_x + lor_w obj_np[17] = lor_y + lor_h + self.offset_y ret_list.append(obj_np) self.t2 = time.time() print("t:1-2:{}".format(self.t2 - self.t1)) if len(ret_list) != 0: ret_list = np.array(ret_list, dtype=np.float32) keep = py_cpu_nms(ret_list, self.nms_thresh) ret_list = ret_list[keep] self.t3=time.time() print("t2-3:{}".format(self.t3-self.t2)) return self.change2objs(ret_list)
def produce_batch(filepath, gt_boxes, h_w, category): img = load_img(filepath) img_width = np.shape(img)[1] * scale[1] img_height = np.shape(img)[0] * scale[0] img = img.resize((int(img_width), int(img_height))) #feed image to pretrained model and get feature map img = img_to_array(img) img = np.expand_dims(img, axis=0) feature_map = pretrained_model.predict(img) height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height #calculate output w, h stride w_stride = h_w[1] / width h_stride = h_w[0] / height #generate base anchors according output stride. #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) #slice tiles according to image size and stride. #each 1x1x1532 feature map is mapping to a tile. shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #apply base anchors to all tiles, to have a num_feature_map*9 anchors. all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * 9 all_anchors = all_anchors.reshape((total_anchors, 4)) # feed feature map to pretrained RPN model, get proposal labels and bboxes. res = rpn_model.predict(feature_map) scores = res[0] scores = scores.reshape(-1, 1) deltas = res[1] deltas = np.reshape(deltas, (-1, 4)) # proposals transform to bbox values (x1, y1, x2, y2) proposals = bbox_transform_inv(all_anchors, deltas) proposals = clip_boxes(proposals, (h_w[0], h_w[1])) # remove small boxes, here threshold is 40 pixel keep = filter_boxes(proposals, 40) proposals = proposals[keep, :] scores = scores[keep] # sort socres and only keep top 6000. pre_nms_topN = 6000 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # apply NMS to to 6000, and then keep top 300 post_nms_topN = 300 keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # add gt_boxes to proposals. proposals = np.vstack((proposals, gt_boxes)) # calculate overlaps of proposal and gt_boxes overlaps = bbox_overlaps(proposals, gt_boxes) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_labels[gt_assignment] #? # sub sample fg_inds = np.where(max_overlaps >= FG_THRESH)[0] fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] bg_rois_per_this_image = BATCH - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # labels = labels[keep_inds] rois = proposals[keep_inds] gt_rois = gt_boxes[gt_assignment[keep_inds]] targets = bbox_transform(rois, gt_rois) #input rois rois_num = targets.shape[0] batch_box = np.zeros((rois_num, 200, 4)) for i in range(rois_num): batch_box[i, category] = targets[i] batch_box = np.reshape(batch_box, (rois_num, -1)) # get gt category batch_categories = np.zeros((rois_num, 200, 1)) for i in range(rois_num): batch_categories[i, category] = 1 batch_categories = np.reshape(batch_categories, (rois_num, -1)) return rois, batch_box, batch_categories