def rnet_boxes(img, rnet, bounding_boxes, thresholds=THRESHOLDS, nms_thresholds=NMS_THRESHOLDS, show_boxes=True): rnet.eval() img_boxes = get_image_boxes(bounding_boxes, img, size=24) img_boxes = torch.FloatTensor(img_boxes) img_boxes = img_boxes.to( torch.device('cuda' if torch.cuda.is_available() else 'cpu')) output = rnet(img_boxes) probs = output[0].data.cpu().numpy() # shape [n_boxes, 1] offsets = output[1].data.cpu().numpy() # shape [n_boxes, 4] keep = np.where(probs[:, 0] > thresholds[1])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 0].reshape((-1, )) offsets = offsets[keep] keep = nms(bounding_boxes, nms_thresholds[1]) bounding_boxes = bounding_boxes[keep] bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) if show_boxes: show_bboxes(img, bounding_boxes, []).show() return bounding_boxes
def run_first_stage(image, net, scale, threshold): """ Run P-Net, generate bounding boxes, and do NMS. """ width, height = image.size sw, sh = math.ceil(width * scale), math.ceil(height * scale) img = image.resize((sw, sh), Image.BILINEAR) # img = np.asarray(img, 'float32') # preprocess 对图像进行归一化操作 img = transforms.ToTensor()(img).unsqueeze(0) img = img.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu')) # print('img:', img) output = net(img) # 只有一张图 batch = 1,所以 [0, ,:,:] # [ , 1,:,:]代表 face=True 的概率 probs = output[0].data.cpu().numpy()[0, 0, :, :] # offsets shape[4, o_h,o_w] offsets = output[1].data.cpu().numpy() # print('offsets:', offsets) # boxes boxes = _generate_bboxes(probs, offsets, scale, threshold) if len(boxes) == 0: return None # [[x1,y1,x2,y2,score,offsets],[]...] # 只取4个坐标加一个置信度进行nms keep = nms(boxes[:, 0:5], overlap_threshold=0.5) return boxes[keep]
def predict(self, image, n, overlap_thresh): boxes, scores = self.edgebox.getproposals( image) #edgeboxes also gives scores boxes = util.cv2_to_numpy(boxes) boxes, scores, _ = util.topn(boxes, scores, n) boxes, scores, _ = util.nms(boxes, scores, overlap_thresh) scores = self.infer(image, boxes) return boxes, scores
def pnet_boxes(img, pnet, min_face_size=MIN_FACE_SIZE, thresholds=THRESHOLDS, nms_thresholds=NMS_THRESHOLDS, show_boxes=True): pnet.eval() width, height = img.size min_length = min(height, width) # print('img min_length is {}'.format(min_length)) min_detection_size = 12 factor = 0.707 # sqrt(0.5) scales = [] # min_face_size 哪来的? m = min_detection_size / min_face_size # 缩放原图使得最小脸尺寸为12pix min_length *= m # 将图片从最小脸为12pix到整张图为12pix,保存对应的缩放比例,都为小于1的数? factor_count = 0 while min_length > min_detection_size: scales.append(m * factor ** factor_count) min_length *= factor factor_count += 1 # STAGE 1 bounding_boxes = [] for s in scales: # run P-Net on different scales boxes = run_first_stage(img, pnet, scale=s, threshold=thresholds[0]) bounding_boxes.append(boxes) # bounding_boxes shape:[scales,boxes_num_each_sale,5] # 把每个scale找到的框框全部打开堆在一起 # [total_boxes_num, 5] 是list bounding_boxes = [i for i in bounding_boxes if i is not None] # print(bounding_boxes) # bounding_boxes = np.array(bounding_boxes) # print(bounding_boxes.shape, img.size) try: _ = bounding_boxes[0] # print('bounding_boxes:{}'.format(len(bounding_boxes))) # print('bounding_boxes[0]:{}'.format(len(bounding_boxes[0]))) except Exception: print(bounding_boxes) img.show() if len(bounding_boxes) == 0: return None bounding_boxes = np.vstack(bounding_boxes) # print(bounding_boxes.shape) keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) bounding_boxes = bounding_boxes[keep] # print('bounding_boxes:{}'.format(bounding_boxes[:, 4] > 0.5)) # 根据 w、h 对 x1,y1,x2,y2 的位置进行微调 bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) # 将检测出的框转化成矩形 bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # print('bounding_boxes:{}'.format(bounding_boxes[:, 4] > 0.5)) # print('bounding_boxes:', len(bounding_boxes), bounding_boxes) if show_boxes: show_bboxes(img, bounding_boxes, []).show() return bounding_boxes
def _worker_match(img, template_name, channel): global _worker_templates if template_name not in _worker_templates: return [] # Store bounding boxes. boxes = [] # Get the template. template_images, threshold, center = _worker_templates[template_name] for scale, (template_color, template_gray) in template_images.items(): # Select a channel. template = template_gray if channel is not None: template = template_color[:, :, channel] # Find the center. w, h = template.shape[::-1] scaled_center = None if center is None: scaled_center = (w // 2, h // 2) else: scaled_center = (center[0] * scale, center[1] * scale) # Make sure the template is smaller than the image. if w >= img.shape[1] or h >= img.shape[0]: continue # Search for the template. res = cv2.matchTemplate(img, template, cv2.TM_SQDIFF_NORMED) # Threshold the result. match_locations = np.where(res <= threshold) # Find all locations. for (x, y) in zip(match_locations[1], match_locations[0]): v = res[y, x] x = int(x + scaled_center[0]) y = int(y + scaled_center[1]) boxes.append((x - w // 2, y - h // 2, x + w // 2, y + h // 2)) # Run non maximum suppression. boxes = util.nms(np.asarray(boxes), 0.0) if len(boxes) == 0: return [] # Return the centers of the boxes. centers_x = (boxes[:, 2] + boxes[:, 0]) / 2 centers_y = (boxes[:, 3] + boxes[:, 1]) / 2 centers = list(zip(centers_x, centers_y)) return centers
def onet_boxes(img, onet, bounding_boxes, thresholds=THRESHOLDS, nms_thresholds=NMS_THRESHOLDS, show_boxes=True): onet.eval() img_boxes = get_image_boxes(bounding_boxes, img, size=48) if len(img_boxes) == 0: return [], [] img_boxes = torch.FloatTensor(img_boxes) img_boxes = img_boxes.to( torch.device('cuda' if torch.cuda.is_available() else 'cpu')) output = onet(img_boxes) probs = output[0].data.cpu().numpy() # shape [n_boxes, 1] offsets = output[1].data.cpu().numpy() # shape [n_boxes, 4] landmarks = output[2].data.cpu().numpy() # shape [n_boxes, 10] keep = np.where(probs[:, 0] > thresholds[2])[0] bounding_boxes = bounding_boxes[keep] # 用更大模型的置信度对原置信度进行更新 bounding_boxes[:, 4] = probs[keep, 0].reshape((-1, )) offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] # print('width:{},\nheight:{},\nxmin:{},\nymin:{}\n'.format(width, height, xmin, ymin)) # landmark[,前5个为x,后5个为y] # 在左上角坐标的基础上,通过 w,h 确定脸各关键点的坐标。 landmarks_pixel = np.zeros(landmarks.shape) landmarks_pixel[:, 0:5] = ( np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0::2]).copy() landmarks_pixel[:, 5:10] = ( np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 1::2]).copy() # for i in landmarks:print(i) bounding_boxes = calibrate_box(bounding_boxes, offsets) keep = nms(bounding_boxes, nms_thresholds[2], mode='min') bounding_boxes = bounding_boxes[keep] landmarks_pixel = landmarks_pixel[keep] if show_boxes: show_bboxes(img, bounding_boxes, landmarks_pixel).show() return bounding_boxes, landmarks_pixel
def detect(split="val", root_path="sandbox", year=2012, gpu=True): m = MatroidModel("matroid/Everyday-Objects.matroid", gpu) voc_train = VOCDetection("~/data/voc/", image_set=split, download=True, year=str(year)) # voc_train = VOCDetection("/deep/group/haosheng/voc/", image_set=split)i GROUNDTRUTH_PATH = os.path.join("Object-Detection-Metrics", "groundtruths") PREDICTION_PATH = os.path.join("Object-Detection-Metrics", "detections") os.makedirs(GROUNDTRUTH_PATH, exist_ok=True) os.makedirs(PREDICTION_PATH, exist_ok=True) for img, target in tqdm(voc_train): # Ground Truth file_name = target['annotation']['filename'].replace("jpg", "txt") with open(os.path.join(GROUNDTRUTH_PATH, file_name), "w") as f: objs = target['annotation']['object'] if not isinstance(objs, list): objs = [objs] for obj in objs: name = obj['name'] bbox = obj['bndbox'] xmin, ymin, xmax, ymax = bbox['xmin'], bbox['ymin'], bbox[ 'xmax'], bbox['ymax'] f.write(f"{name} {xmin} {ymin} {xmax} {ymax}\n") # Prediction with open(os.path.join(PREDICTION_PATH, file_name), "w") as f: boxes, probs = m.predict(img) preds = nms(boxes, probs) h, w = img.size for label, confidence, bbox in preds: xmin, ymin, xmax, ymax = bbox[0] * \ h, bbox[1]*w, bbox[2]*h, bbox[3]*w name = VOC_LABEL2NAME[label] f.write( f"{name} {confidence} {xmin:.0f} {ymin:.0f} {xmax:.0f} {ymax:.0f}\n" )
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def hard_negative_mine(self, folder, epochs): # this function is used to train svm by hard negative mining #initializing svm with default weights of positive sample means self.initialize_svm(folder) util.printProgressBar(0, epochs, prefix='Starting', suffix='complete') for epoch in range(epochs): # STEP 1: Mine for negative samples for i, img in enumerate(self.imgs): boxes, scores = self.edgebox.getproposals(img) if not isinstance( boxes, tuple): #because if boxes is tuple, its empty boxes = util.cv2_to_numpy(boxes) boxes, scores = self.filter_negsamples(boxes, scores, i) if (boxes.shape[0] > self.n): #only do it if more than n samples boxes, scores, _ = util.topn(boxes, scores, self.n) if (boxes.shape[0] > 0): #only do it some boxes survive the journey boxes, scores, _ = util.nms(boxes, scores, self.overlap_thresh) self.neg_rects.append(boxes.tolist()) # else: # print("no boxes detected!") # STEP 2: Add those samples into dataset self.populate_data(True) self.populate_data(False) # STEP 3: Prepare data X, y = self.prepare_data() # STEP 4: train the svm self.train_svm(X, y) util.printProgressBar(epoch + 1, epochs, prefix='Epoch %d' % (epoch + 1), suffix='complete') print('Training successfully finished after %d epochs' % epochs)
def test_img(img, model): pts_all = [] scores_all = [] # original size pts, scores, _, _, _ = testutil.run_pipeline(img, model) pts_all += pts scores_all += scores # crop ratio 2 imgs2, metas2 = util.crop_images(img, 2) for i, m in zip(imgs2, metas2): pts, scores, _, _, _ = testutil.run_pipeline(i, model) pts = util.restore_pts(pts, m) pts_all += pts scores_all += scores # crop ratio 4 imgs2, metas2 = util.crop_images(img, 4) for i, m in zip(imgs2, metas2): pts, scores, _, _, _ = testutil.run_pipeline(i, model) pts = util.restore_pts(pts, m) pts_all += pts scores_all += scores pts, scores = util.nms(pts_all, scores_all) # print(scores) # for i in range(len(pts)): # if scores[i]<0.3: # continue # imgcp = img.copy() # skltn = vis_skeleton(imgcp, pts[i]) # cv2.imwrite('outputs/skt_%d.png'%i, skltn) return pts, scores
def get_bboxes(outputs, proposals, num_proposals, num_classes, im_shape, im_scale, max_per_image=100, thresh=0.001, nms_thresh=0.4): """ Returns bounding boxes for detected objects, organized by class. Transforms the proposals from the region proposal network to bounding box predictions using the bounding box regressions from the classification network: (1) Applying bounding box regressions to the region proposals. (2) For each class, take proposed boxes where the corresponding objectness score is greater then THRESH. (3) Apply non-maximum suppression across classes using NMS_THRESH (4) Limit the maximum number of detections over all classes to MAX_PER_IMAGE Arguments: outputs (list of tensors): Faster-RCNN model outputs proposals (Tensor): Proposed boxes from the model's proposalLayer num_proposals (int): Number of proposals num_classes (int): Number of classes im_shape (tuple): Shape of image im_scale (float): Scaling factor of image max_per_image (int): Maximum number of allowed detections per image. Default is 100. None indicates no enforced maximum. thresh (float): Threshold for objectness score. Default is 0.001. nms_thresh (float): Threshold for non-maximum suppression. Default is 0.4. Returns: detections (array): Array of bounding box detections in a N x 6 array. Each bounding box has the following attributes:[xmin, ymin, xmas, ymax, score, class] """ proposals = proposals.get()[:num_proposals, :] # remove padded proposals boxes = proposals[:, 1:5] / im_scale # scale back to real image space # obtain bounding box corrections from the frcn layers scores = outputs[2][0].get()[:, :num_proposals].T bbox_deltas = outputs[2][1].get()[:, :num_proposals].T # apply bounding box corrections to the region proposals pred_boxes = util.bbox_transform_inv(boxes, bbox_deltas) pred_boxes = util.clip_boxes(pred_boxes, im_shape) detections = [] # Skip the background class, start processing from class 1 for j in range(1, num_classes): inds = np.where(scores[:, j] > thresh)[0] # obtain class-specific boxes and scores cls_labels = j * np.ones((len(inds), 1)) cls_scores = scores[inds, j] cls_boxes = pred_boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis], cls_labels)).astype(np.float32, copy=False) # apply non-max suppression keep = util.nms(cls_dets, nms_thresh) cls_dets = cls_dets[keep, :] # store results if cls_dets.size != 0: detections.append(cls_dets) # detections[j] = cls_dets # guard against no detections if len(detections) != 0: detections = np.vstack(detections) # Limit to max_per_image detections *over all classes* if max_per_image is not None: if len(detections) > max_per_image: # compute threshold needed to keep the top max_per_image image_thresh = np.sort(detections[:, -2])[-max_per_image] keep = np.where(detections[:, -2] >= image_thresh)[0] detections = detections[keep, :] # For each bounding box: # [xmin, ymin, xmax, ymax, score, class] return detections
def main(): args = parse_cmdline() img_fn = os.path.abspath(args.img_fn) if not os.path.exists(img_fn): print('Not found: {}'.format(img_fn)) sys.exit(-1) else: print('Target image: {}'.format(img_fn)) # Loaa target image target_image = util.load_target_image(img_fn) # Get object proposals object_proposals = util.get_object_proposals(target_image) # Setup computation graph graph_params = setup_graph() # Model initialize sess = tf.Session(graph=graph_params['graph']) tf.global_variables_initializer() if os.path.exists('models'): save_path = os.path.join('models', 'deep_logo_model') graph_params['saver'].restore(sess, save_path) print('Model restored') else: print('Initialized') # Logo recognition results = [] for obj_proposal in object_proposals: x, y, w, h = obj_proposal crop_image = target_image[y:y + h, x:x + w] results.append( logo_recognition(sess, crop_image, obj_proposal, graph_params)) del_idx = [] for i, result in enumerate(results): if result['pred_class'] == common.CLASS_NAME[-1]: del_idx.append(i) results = np.delete(results, del_idx) # Non-max suppression nms_results = util.nms(results, pred_prob_th=0.999999, iou_th=0.4) # Draw rectangles on the target image fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6)) ax.imshow(target_image) for result in nms_results: print(result) (x, y, w, h) = result['obj_proposal'] ax.text(x, y, result['pred_class'], fontsize=13, bbox=dict(facecolor='red', alpha=0.7)) rect = mpatches.Rectangle((x, y), w, h, fill=False, edgecolor='red', linewidth=1) ax.add_patch(rect) plt.show()
def evaluate(model, image_path, target_path, iou_thres, conf_thres, nms_thres, image_size, batch_size, num_workers, device, output=False): model.eval() dataSet = utilData.ListDataset(image_path, target_path, augment=False, img_size=image_size) dataLoader = torch.utils.data.DataLoader(dataSet, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=dataSet.collate_fn) labels = [] correct = 0 error = 0 entire_time = 0 if output and not os.path.isdir('./dog_dataset/eval/result_image'): os.mkdir('./dog_dataset/eval/result_image') for _, images, targets in tqdm.tqdm(dataLoader, desc='Evaluate method', leave=False): if targets is None: continue labels.extend(targets[:, 1].tolist()) targets[:, 1:] = util.get_xxyy_from_xywh(targets[:, 1:]) targets[:, 1:] *= image_size start_time = time.time() with torch.no_grad(): images = images.to(device) outputs = model(images) outputs = util.nms(outputs, conf_thres, nms_thres) entire_time += time.time() - start_time if output: for i, path in enumerate(_): img = cv2.imread(path) h, w, a = img.shape if h > w: pad = [0, 0, (h - w) // 2, (h - w) - ((h - w) // 2)] else: pad = [(w - h) // 2, (w - h) - ((w - h) // 2), 0, 0] img = cv2.copyMakeBorder(img, pad[0], pad[1], pad[2], pad[3], cv2.BORDER_CONSTANT, value=[0, 0, 0]) img = cv2.resize(img, (image_size, image_size), interpolation=cv2.INTER_AREA) splited_path = os.path.split(path) if outputs[i] is None: if targets[i, 1:].sum() == 0: correct += 1 else: error += 1 print('outputnone error', path) cv2.imwrite( './dog_dataset/eval/result_image/' + splited_path[1], img) continue for box in outputs[i]: img = cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 3) if targets[i, 1:].sum() == 0: error += 1 print('targetnone error', path) cv2.imwrite( './dog_dataset/eval/result_image/' + splited_path[1], img) continue ious = util.get_bbox_iou(outputs[i], targets[i:i + 1, 1:], True) for iou in ious: if iou >= iou_thres: correct += 1 / len(ious) else: error += 1 / len(ious) print(iou, path) cv2.imwrite( './dog_dataset/eval/result_image/' + splited_path[1], img) else: for i, path in enumerate(_): if outputs[i] is None: if targets[i, 1:].sum() == 0: correct += 1 else: error += 1 continue ious = util.get_bbox_iou(outputs[i], targets[i:i + 1, 1:], True) for iou in ious: if iou >= iou_thres: correct += 1 / len(ious) else: error += 1 / len(ious) return correct, error, correct / (correct + error) * 100
def get_bboxes(outputs, proposals, num_proposals, num_classes, im_shape, im_scale, max_per_image=100, thresh=0.001, nms_thresh=0.4): """ Returns bounding boxes for detected objects, organized by class. Transforms the proposals from the region proposal network to bounding box predictions using the bounding box regressions from the classification network: (1) Applying bounding box regressions to the region proposals. (2) For each class, take proposed boxes where the corresponding objectness score is greater then THRESH. (3) Apply non-maximum suppression across classes using NMS_THRESH (4) Limit the maximum number of detections over all classes to MAX_PER_IMAGE Arguments: outputs (list of tensors): Faster-RCNN model outputs proposals (Tensor): Proposed boxes from the model's proposalLayer num_proposals (int): Number of proposals num_classes (int): Number of classes im_shape (tuple): Shape of image im_scale (float): Scaling factor of image max_per_image (int): Maximum number of allowed detections per image. Default is 100. None indicates no enforced maximum. thresh (float): Threshold for objectness score. Default is 0.001. nms_thresh (float): Threshold for non-maximum suppression. Default is 0.4. Returns: detections (array): Array of bounding box detections in a N x 6 array. Each bounding box has the following attributes:[xmin, ymin, xmas, ymax, score, class] """ proposals = proposals.get()[:num_proposals, :] # remove padded proposals boxes = proposals[:, 1:5] / im_scale # scale back to real image space # obtain bounding box corrections from the frcn layers scores = outputs[2][0].get()[:, :num_proposals].T bbox_deltas = outputs[2][1].get()[:, :num_proposals].T # apply bounding box corrections to the region proposals pred_boxes = util.bbox_transform_inv(boxes, bbox_deltas) pred_boxes = util.clip_boxes(pred_boxes, im_shape) detections = [] # Skip the background class, start processing from class 1 for j in range(1, num_classes): inds = np.where(scores[:, j] > thresh)[0] # obtain class-specific boxes and scores cls_labels = j * np.ones((len(inds), 1)) cls_scores = scores[inds, j] cls_boxes = pred_boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_labels)).astype(np.float32, copy=False) # apply non-max suppression keep = util.nms(cls_dets, nms_thresh) cls_dets = cls_dets[keep, :] # store results if cls_dets.size != 0: detections.append(cls_dets) # detections[j] = cls_dets # guard against no detections if len(detections) != 0: detections = np.vstack(detections) # Limit to max_per_image detections *over all classes* if max_per_image is not None: if len(detections) > max_per_image: # compute threshold needed to keep the top max_per_image image_thresh = np.sort(detections[:, -2])[-max_per_image] keep = np.where(detections[:, -2] >= image_thresh)[0] detections = detections[keep, :] # For each bounding box: # [xmin, ymin, xmax, ymax, score, class] return detections
test_outputs = [] start = time.time() for i, fname in enumerate(os.listdir(test_jpg_dir)): print(f"Predicting boxes for image # {i+1}\r", end="") fpath = os.path.join(test_jpg_dir, fname) fid = fname[:-4] boxes_pred1, scores1 = util.get_detection_from_file(fpath, model1, sz) boxes_pred2, scores2 = util.get_detection_from_file(fpath, model2, sz) indices1 = np.where(scores1 > score_threshold1)[0] scores1 = scores1[indices1] boxes_pred1 = boxes_pred1[indices1] boxes_pred1, scores1 = util.nms(boxes_pred1, scores1, nms_threshold) indices2 = np.where(scores2 > score_threshold2)[0] scores2 = scores2[indices2] boxes_pred2 = boxes_pred2[indices2] boxes_pred2, scores2 = util.nms(boxes_pred2, scores2, nms_threshold) boxes_pred = np.concatenate((boxes_pred1, boxes_pred2)) scores = np.concatenate((scores1, scores2)) boxes_pred, scores = util.averages(boxes_pred, scores, wt_overlap, solo_min) util.shrink(boxes_pred, shrink_factor) output = '' for j, bb in enumerate(boxes_pred):
def main(): args = parse_cmdline() img_fn = os.path.abspath(args.img_fn) save_img = args.save_img if not os.path.exists(img_fn): print('Not found: {}'.format(img_fn)) sys.exit(-1) else: print('Target image: {}'.format(img_fn)) # Loaa target image target_image = cv2.imread(img_fn) # Get object proposals object_proposals = util.get_object_proposals(target_image) # Setup computation graph graph_params = setup_graph() # Model initialize sess = tf.Session(graph=graph_params['graph']) tf.global_variables_initializer() if os.path.exists('models'): save_path = os.path.join('models', 'deep_traffic_sign_model') graph_params['saver'].restore(sess, save_path) print('Model restored') else: print('Initialized') # traffic sign recognition results = [] for obj_proposal in object_proposals: x, y, w, h = obj_proposal crop_image = target_image[y:y + h, x:x + w] results.append( traffic_sign_recognition(sess, crop_image, obj_proposal, graph_params)) """ del_idx = [] for i, result in enumerate(results): if result['pred_class'] == common.CLASS_NAME[-1]: del_idx.append(i) results = np.delete(results, del_idx) """ # Non-max suppression nms_results = util.nms(results, pred_prob_th=0.999999, iou_th=0.4) # Draw rectangles on the target image fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6)) ax.imshow(cv2.cvtColor(target_image, cv2.COLOR_BGR2RGB)) for result in nms_results: print(result) (x, y, w, h) = result['obj_proposal'] ax.text(x, y, cls2name(result['pred_class']), fontsize=13, bbox=dict(facecolor='red', alpha=0.7)) rect = mpatches.Rectangle((x, y), w, h, fill=False, edgecolor='red', linewidth=1) ax.add_patch(rect) plt.show() # save the target image save_fname = os.path.splitext(os.path.basename(img_fn))[0] + '_result.jpg' if save_img: fig.savefig(save_fname, bbox_inches='tight', pad_inches=0.0)
def getFace(image): raw_img_bgr = np.asarray(image) raw_img = cv2.cvtColor(raw_img_bgr, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) scales = calc_scales(model_face, raw_img, clusters) bboxes = np.empty(shape=(0, 5)) # initialize output for s in scales: # process input at different scales img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range(4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list(set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25:125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 tmp_bboxes = calc_bounding_boxes(prob_cls_tf, score_reg_tf, score_cls_tf, s) bboxes = np.vstack((bboxes, tmp_bboxes)) refind_idx = nms(bboxes, nms_thresh) refined_bboxes = bboxes[refind_idx] # convert PIL Image to OpenCV Image image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) origin_img = image_cv.copy() if len(bboxes) == 0: return False for refined_bbox in refined_bboxes: bbox = refined_bbox.astype(np.int64) cv2.rectangle(origin_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2) orig_w = image_cv.shape[0] orig_h = image_cv.shape[1] face_h = int((bbox[3] - bbox[1]) * 1.4) face_w = int((bbox[2] - bbox[0]) * 1.4) face_x = bbox[0] - int(face_w * 0.2) face_y = bbox[1] - int(face_h * 0.2) if face_x < 0: face_x = 0 if face_y < 0: face_y = 0 if face_w > orig_w: face_w = orig_w - 2 if face_h > orig_h: face_h = orig_h - 2 crop_face = image_cv[face_y:face_y + face_h, face_x:face_x + face_w].copy() cv2.imwrite('cropped.jpg', crop_face) # crop_face2 = crop_face.copy() new_x = bbox[0] if bbox[0] > 0 else 0 new_y = bbox[1] if bbox[1] > 0 else 0 new_w = bbox[2] - new_x new_h = bbox[3] - new_y age, gender, face_cv2 = ageGender(crop_face) print(age, gender) # print(new_x, new_y, new_w, new_h) # age, gender = age_gender_detector(image_cv, face_x, face_y, face_w, face_h) # print(age, gender) if age is not False: font = cv2.FONT_HERSHEY_SIMPLEX bottomLeftCornerOfText = (bbox[0], bbox[1] - 15) fontScale = 1 fontColor = (0, 0, 255) lineType = 2 cv2.putText(origin_img, age + gender, bottomLeftCornerOfText, font, fontScale, fontColor, lineType) # cv2.rectangle(image_cv, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2) cv2.imwrite('output.jpg', origin_img) cv2.namedWindow('output', cv2.WINDOW_NORMAL) cv2.imshow('output', origin_img) cv2.resizeWindow('output', 600, 600) cv2.waitKey(0) cv2.destroyAllWindows() return len(refined_bboxes)
for i in range(4998): png_name = 'test{:04d}.png'.format(i) fpath = os.path.join(test_jpg_dir, png_name) print(f"\rPredicting boxes for image : {fpath}", end="", flush=True) boxes_pred_list = [] scores_list = [] for model in models: boxes_pred, scores = util.get_detection_from_file(fpath, model, sz) indices = np.where(scores > score_threshold)[0] scores = scores[indices] boxes_pred = boxes_pred[indices] boxes_pred, scores = util.nms(boxes_pred, scores, nms_threshold) boxes_pred_list.append(boxes_pred) scores_list.append(scores) boxes_pred_np = np.concatenate(boxes_pred_list, axis=0) scores_np = np.concatenate(scores_list, axis=0) boxes_pred_np, scores_np = util.averages(boxes_pred_np, scores_np, wt_overlap, solo_min) util.shrink(boxes_pred_np, shrink_factor) hasBbox = False for j, bb in enumerate(boxes_pred_np): x1 = int(bb[0]) y1 = int(bb[1])
def main(file_name, graph_params, sess): img_fn = os.path.join("images", file_name) if not os.path.exists(img_fn): print('Not found: {}'.format(img_fn)) sys.exit(-1) else: print('Target image: {}'.format(img_fn)) # Load target image target_image = util.load_target_image(img_fn) #cv.normalize(target_image, target_image, 0, 255, cv.NORM_MINMAX) # limg = np.arcsinh(target_image) # limg /= limg.max() # low = np.percentile(limg, 0.25) # high = np.percentile(limg, 99.5) # opt_img = skie.exposure.rescale_intensity(limg, in_range=(low, high)) # target_image = opt_img # target_image = target_image.astype(np.float64) # Get object proposals object_proposals = util.get_object_proposals(target_image) # Logo recognition results = [] for obj_proposal in object_proposals: x, y, w, h = obj_proposal crop_image = target_image[y:y + h, x:x + w] results.append( logo_recognition(sess, crop_image, obj_proposal, graph_params)) del_idx = [] for i, result in enumerate(results): if result['pred_class'] == common.CLASS_NAME[-1]: del_idx.append(i) results = np.delete(results, del_idx) # Non-max suppression nms_results = util.nms(results, pred_prob_th=0.9, iou_th=0.4) # Draw rectangles on the target image fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6)) ax.imshow(target_image) for result in nms_results: print(result) (x, y, w, h) = result['obj_proposal'] ax.text(x, y, "{} {:.2f}".format(result['pred_class'], result['pred_prob']), fontsize=13, bbox=dict(facecolor='red', alpha=0.7)) rect = mpatches.Rectangle((x, y), w, h, fill=False, edgecolor='red', linewidth=1) ax.add_patch(rect) #img = BytesIO() plt.tight_layout() plt.savefig(os.path.join("results", file_name), bbox_inches='tight', pad_inches=0)
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = [float(x.get()) for x in bottom[2]] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] # Generate anchors in same order as we do in neon for unit testing # anchors = self._anchors.reshape((1, A, 4)) + \ # shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = self._anchors.reshape((1, A, 4)).transpose((1, 0, 2)) + \ shifts.reshape((1, K, 4)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Re-order proposals to match neon for unit testing # bbox_deltas = bbox_deltas.reshape((38, 50, 9, 4)).transpose((2, 0, 1, 3)).reshape((-1,4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Also re-order scores # scores = scores.reshape((38, 50, 9, 1)).transpose((2, 0, 1, 3)).reshape((-1, 1)) scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, self.min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] if DEBUG: print("(CAFFE) len(keep) before nms: {}".format(len(keep))) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if self.pre_nms_topN > 0: order = order[:self.pre_nms_topN] proposals = proposals[order, :] scores = scores[order] if DEBUG: print("(CAFFE) len(proposals) after get_top_N: {}".format( len(proposals))) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), self.nms_thresh) if DEBUG: print("(CAFFE) len(keep) before clipping: {}".format(len(keep))) if self.post_nms_topN > 0: keep = keep[:self.post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] if DEBUG: print("(CAFFE) len(keep) after nms: {}".format(len(keep))) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0] = blob top[1] = scores
def infer(self, imagePath, confidenceThreshold, minHeight, maxHeight): # preparing input im = cv2.imread(imagePath).astype(self.net.blobs["data"].data.dtype) im -= list(map(int, self.config["channel_shift"])) pad = int(self.config["pad"][0]) h, w = im.shape[0:2] padH = (pad - (h % pad)) % pad padW = (pad - (w % pad)) % pad padded = np.zeros(dtype=self.net.blobs["data"].data.dtype, shape=(h + padH, w + padW, im.shape[2])) padded[:h, :w, :] = im[...] im = padded h += padH w += padW im = im.transpose(2, 0, 1) # nhwcf -> nchw im_input = im[np.newaxis, ...] self.net.blobs["data"].reshape(*im_input.shape) # self.net.blobs["data"].data[...] = im_input imSrc = None # imSrc = im_input[0,...].copy() # imSrc = imSrc.transpose(1,2,0) # imSrc += list(map(int,self.config["channel_shift"])) # # # bgr -> rgb # temp = imSrc[...,0].copy() # imSrc[...,0] = imSrc[...,2] # imSrc[...,2] = temp[...] # # imSrc[imSrc<0] = 0 # imSrc = Image.fromarray( imSrc.astype(np.uint8) ) # running net # self.net.forward() im_input2 = np.empty(shape=im_input.shape, dtype=im_input.dtype) im_input2[...] = im_input[...] im_input = im_input2 forwardKwargs = {"data": im_input.astype(np.float32, copy=False)} self.net.forward(**forwardKwargs) # processing output outScores = self.net.blobs[self.config["score_blob"][0]].data outBoxes = self.net.blobs[self.config["bb_reg_blob"][0]].data return outScores, outBoxes, im_input scales = self.config["scales"] scales = list(map(int, scales)) stride = int(self.config["stride"][0]) lenScales = len(scales) boxes, scores = [], [] for i in range(lenScales): for y in range(outScores.shape[2]): for x in range(outScores.shape[3]): # anchors & bbox regression currentScore = outScores[0, i + lenScales, y, x] if currentScore > confidenceThreshold: size = scales[i] * stride xCorr = outBoxes[0, 4 * i, y, x] * size yCorr = outBoxes[0, 4 * i + 1, y, x] * size wCorr = np.exp(outBoxes[0, 4 * i + 2, y, x]) * size hCorr = np.exp(outBoxes[0, 4 * i + 3, y, x]) * size xCenter = x * stride + xCorr + stride / 2 yCenter = y * stride + yCorr + stride / 2 x1 = xCenter - (wCorr / 2) x2 = xCenter + (wCorr / 2) y1 = yCenter - (hCorr / 2) y2 = yCenter + (hCorr / 2) if (x1 >= 0 and y1 >= 0 and x2 <= w and y2 <= h and y2 - y1 + 1 >= minHeight and y2 - y1 + 1 <= maxHeight): boxes.append([x1, y1, x2, y2]) scores.append(currentScore) if len(scores) == 0: return [], [], imSrc # grouping detections (nms) nmsIouThreshold = float(self.config["iou_threshold"][0]) groupedScores, groupedBoxes = util.nms(scores, boxes, nmsIouThreshold) return groupedScores, groupedBoxes, imSrc