def interpret_out(self, out, img_w, img_h): num_classes = self._num_classes iou_threshould = self._iou_threshold threshold = self._threshold pre_boxes = [boxes[:, :, :, :, :4] for boxes in out] pre_boxes = [np.reshape(boxes, boxes.shape[1:]) \ for boxes in pre_boxes] pre_boxes = self.coordinate_transfer(img_w, img_h, pre_boxes) pre_clses = [clses[:, :, :, :, 4:] for clses in out] pre_clses = [np.reshape(clses, (-1, clses.shape[-1])) \ for clses in pre_clses] pre_clses = np.vstack(pre_clses) res = {} assert len(pre_boxes) == len(pre_clses) max_inds = np.argmax(pre_clses, axis=1) keep_inds = np.where(max_inds != 0) pre_boxes = pre_boxes[keep_inds] print(keep_inds) pre_clses = pre_clses[keep_inds] scores = np.exp(pre_clses) \ / np.sum(np.exp(pre_clses), axis=1).reshape([-1, 1]) print("len:", len(pre_boxes)) for i in range(num_classes)[1:]: keep_inds = np.where(scores[:, i] >= threshold)[0] print(keep_inds) dets = np.hstack([ pre_boxes[keep_inds], scores[:, i][keep_inds].reshape([-1, 1]) ]) keep_inds = nms(dets, iou_threshould) if len(keep_inds) > 0: res[str(i)] = dets[keep_inds] return res
def detect(self, img): h, w = img.shape[:2] inp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) inp = cv2.resize(inp, (self.input_size, self.input_size)) inp = (inp - 127.5) / 128.0 # shape of y_pred: (?, num_boxes, 4 + num_classes) outs = self.sess.run(self.net.prediction, feed_dict={self.inputs: np.array([inp])})[0] boxes = outs[:, :4] preds = outs[:, 4:] decoded_boxes = self.decode_boxes(boxes) boxes = [] for box, pred in zip(decoded_boxes, preds): xmin, ymin, xmax, ymax = box clsid = np.argmax(pred) if clsid == 0: # in the case of background continue clsid -= 1 # decrement to skip background class prob = np.max(pred) if prob < self.threshold: continue left = xmin * w top = ymin * h right = xmax * w bottom = ymax * h boxes.append([clsid, prob, left, top, right, bottom]) if len(boxes) > 0: return nms(boxes) else: return {}
def detect(self, img): img_h, img_w = img.shape[:2] img = self.preprocess(img) outs = self.sess.run(self.net.prediction, feed_dict={self.inputs: np.array([img])})[0] # shape of y_pred: (?, num_boxes, 4 + num_classes) boxes = outs[:, :4] preds = outs[:, 4:] decoded_boxes = self.decode_boxes(boxes) results = [] for box, pred in zip(decoded_boxes, preds): xmin, ymin, xmax, ymax = box clsid = np.argmax(pred) if clsid == 0: # in the case of background continue clsid -= 1 # decrement to skip background class prob = np.max(pred) left = xmin * img_w top = ymin * img_h right = xmax * img_w bottom = ymax * img_h results.append([clsid, prob, left, top, right, bottom]) if len(results) > 0: return nms(results, self.threshold) else: return {}
def get_crop_images(feature_map, im, pixel_threshold=0.9, quiet=True): shape = im.size d_wight, d_height = resize_image(im, MAX_IMAGE_SIZE) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height y = feature_map y = np.squeeze(y, axis=0) y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) txt_items = [] for score, geo in zip(quad_scores, quad_after_nms): if np.amin(score) > 0: rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist() txt_item = list(map(int, rescaled_geo_list)) ploy = [[txt_item[0], txt_item[1]], [txt_item[6], txt_item[7]], [txt_item[4], txt_item[5]], [txt_item[2], txt_item[3]]] txt_items.append(ploy) elif not quiet: print('quad invalid with vertex num less then 4.') crop_images, ploys = rotate.rotate_img(txt_items, np.array(im)) return crop_images, ploys, shape
def detect_from_image(self, session, full_image, visualize=False): # step1: preprocess, image resize, grayscale, equalizeHist image, _ = self.resize(full_image) image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image_gray = cv2.equalizeHist(image_gray) # face detect face_regions = self.face_detector.detectMultiScale(image_gray, 1.05, 4, minSize=(60, 60)) # eyes detect based on face region eyes_regions = [] for (x, y, w, h) in face_regions: face_image = image_gray[y:y + h, x:x + w] # resize face, transform to high resolution face_image, scale = self.resize(face_image) # first eyes detection eyes_roi = self.eyes_detector.detectMultiScale(face_image, 1.05, 2, minSize=(60, 60), maxSize=(120, 120)) # second eyes detection for (ex, ey, ew, eh) in eyes_roi: eye = face_image[ey:ey + eh, ex:ex + ew] pred = self.eyes_selector.predict(session, eye) if pred == 1: eyes_regions.append([ x + int(ex / scale), y + int(ey / scale), int(ew / scale), int(eh / scale) ]) # apply nms reduce bbox eyes_regions = nms(eyes_regions, thres=0.5) # visulize if visualize: # plot for (x, y, w, h) in face_regions: cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2) for (x, y, w, h) in eyes_regions: cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) # 显示标定框 cv2.imshow("eye detect", image) cv2.waitKey(5) # post precess, generate left/right eye boundingbox # detect bbox more than 2, return None if len(eyes_regions) != 2: return None, None # convert to (left, right) pair format eyes_bndbox = sorted(eyes_regions, key=lambda bbox: bbox[0] + bbox[2] // 2) eyes_region = [ cv2.resize(image_gray[y:y + h, x:x + w], (self.image_size, self.image_size)) for (x, y, w, h) in eyes_bndbox ] return eyes_bndbox, eyes_region
def interpret_output(self, output): # NOTE: duplicate code here class_prob = output[0:self._boundary1] class_prob = np.reshape( class_prob, [self._cell_size, self._cell_size, self._num_classes]) scales = output[self._boundary1:self._boundary2] scales = np.reshape( scales, [self._cell_size, self._cell_size, self._boxes_per_cell]) boxes = output[self._boundary2:] boxes = np.reshape( boxes, [self._cell_size, self._cell_size, self._boxes_per_cell, 4]) offset = np.arange(self._cell_size) \ * self._cell_size * self._boxes_per_cell offset = np.reshape(offset, \ [self._boxes_per_cell, self._cell_size, self._cell_size]) offset = np.transpose(offset, [1, 2, 0]) # boxes[:, :, :, 0] += offset # boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2)) boxes[:, :, :, :2] = 1.0 * boxes[:, :, :, :2] / self._cell_size boxes[:, :, :, 2:] = np.square(boxes[:, :, :, 2:]) # duplicate code here boxes *= self._image_size self.coordinate_transfer(boxes) probs = np.zeros((self._cell_size, self._cell_size, self._boxes_per_cell, self._num_classes)) for i in range(self._boxes_per_cell): for j in range(self._num_classes): tmp = scales[:, :, i] * class_prob[:, :, j] if tmp < self._thresh_hold: probs[:, :, i, j] = 0.0 else: probs[:, :, i, j] = tmp probs = np.transpose(probs, (3, 0, 1, 2)) probs = np.reshape(probs, (self._num_classes , self._cell_size * self._cell_size \ * self._boxes_per_cell)) boxes = np.reshape( boxes, (self._cell_size * self._cell_size * self._boxes_per_cell, 4)) res = {} for i in range(len(self._classes)): prob = np.reshape( probs[i], [self._cell_size * self._cell_size * self._boxes_per_cell, 1]) dets = np.hstack(boxes, prob) keep_inds = nms(dets, self._iou_thresh_hold) res[str[i]] = dets[keep_inds] return res
def visualize_heatmaps(self, img, cls_map, reg_map, clusters, prob_thresh=1, nms_thresh=1, iou=None): """ Expect cls_map and reg_map to be of the form HxWxC """ fy, fx, fc = np.where(cls_map >= prob_thresh) # print(iou.shape) # best_iou = iou.max(axis=3) # print(best_iou.shape) # fy, fx, fc = np.where(best_iou >= 0.5) # neg thresh cy, cx = fy*self.sty + self.ofy, fx*self.stx + self.ofx cw = clusters[fc, 2] - clusters[fc, 0] + 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 # box_ovlp = best_iou[fc, fy, fx] num_clusters = clusters.shape[0] # refine bounding box tx = reg_map[:, :, 0*num_clusters:1*num_clusters] ty = reg_map[:, :, 1*num_clusters:2*num_clusters] tw = reg_map[:, :, 2*num_clusters:3*num_clusters] th = reg_map[:, :, 3*num_clusters:4*num_clusters] dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rx = cx + dcx ry = cy + dcy rw = cw * np.exp(tw[fy, fx, fc]) rh = ch * np.exp(th[fy, fx, fc]) bboxes = np.array([np.abs(rx-rw/2), np.abs(ry-rh/2), rx+rw/2, ry+rh/2]).T scores = cls_map[fy, fx, fc] dets = np.hstack((bboxes, scores[:, np.newaxis])) keep = nms(dets, nms_thresh) bboxes = dets[keep][:, 0:4] # bbox_iou = best_iou[fy, fx, fc] # print("Best bounding box", bboxes) # print(bboxes.shape) print("Number of bboxes ", bboxes.shape[0]) for idx, bbox in enumerate(bboxes): bbox = np.round(np.array(bbox)) print(bbox) # img = draw_bounding_box(img, bbox, {"name": "car {0}".format(np.around(bbox_iou[idx], decimals=2))}) img = draw_bounding_box(img, bbox, {"name": "car {0}".format(idx)}) # if idx == 20: # break img.show(title="Heatmap visualized")
def interpret_output(self, img_w, img_h, output): # NOTE: duplicate code here output = np.reshape(output, output.shape[-1]) class_prob = output[0:self._boundary1] class_prob = np.reshape(class_prob \ , [self._cell_size, self._cell_size, self._num_classes]) scales = output[self._boundary1:self._boundary2] scales = np.reshape( scales, [self._cell_size, self._cell_size, self._boxes_per_cell]) boxes = output[self._boundary2:] boxes = np.reshape( boxes, [self._cell_size, self._cell_size, self._boxes_per_cell, 4]) offset = [np.arange(self._cell_size)] \ * self._cell_size * self._boxes_per_cell offset = np.reshape(offset, \ [self._boxes_per_cell, self._cell_size, self._cell_size]) offset = np.transpose(offset, [1, 2, 0]) boxes[:, :, :, 0] += offset boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2)) boxes[:, :, :, :2] = 1.0 * boxes[:, :, :, :2] / self._cell_size boxes[:, :, :, 2:] = np.square(boxes[:, :, :, 2:]) # duplicate code here boxes *= self._image_size self.coordinate_transfer(img_w, img_h, boxes) probs = np.zeros((self._cell_size, self._cell_size, self._boxes_per_cell, self._num_classes)) for i in range(self._boxes_per_cell): for j in range(self._num_classes): tmp = scales[:, :, i] * class_prob[:, :, j] probs[:, :, i, j] = tmp * (tmp >= cfg.THRESHOLD) probs = np.transpose(probs, (3, 0, 1, 2)) probs = np.reshape(probs, (self._num_classes , self._cell_size * self._cell_size \ * self._boxes_per_cell)) boxes = np.reshape( boxes, (self._cell_size * self._cell_size * self._boxes_per_cell, 4)) res = {} for i in range(len(self._classes)): prob = np.reshape( probs[i], [self._cell_size * self._cell_size * self._boxes_per_cell, 1]) dets = np.hstack([boxes, prob]) keep_inds = nms(dets, self._iou_thresh_hold) if len(keep_inds) > 0: res[str(i)] = dets[keep_inds] return res
def cones_detection(target_path, output_path, model, device, conf_thres, nms_thres): img = Image.open(target_path).convert('RGB') w, h = img.size new_width, new_height = model.img_size() pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width) img = torchvision.transforms.functional.pad(img, padding=(pad_w, pad_h, pad_w, pad_h), fill=(127, 127, 127), padding_mode="constant") img = torchvision.transforms.functional.resize(img, (new_height, new_width)) bw = model.get_bw() if bw: img = torchvision.transforms.functional.to_grayscale( img, num_output_channels=1) img = torchvision.transforms.functional.to_tensor(img) img = img.unsqueeze(0) with torch.no_grad(): model.eval() img = img.to(device, non_blocking=True) # output,first_layer,second_layer,third_layer = model(img) output = model(img) for detections in output: detections = detections[detections[:, 4] > conf_thres] box_corner = torch.zeros((detections.shape[0], 4), device=detections.device) xy = detections[:, 0:2] wh = detections[:, 2:4] / 2 box_corner[:, 0:2] = xy - wh box_corner[:, 2:4] = xy + wh probabilities = detections[:, 4] nms_indices = nms(box_corner, probabilities, nms_thres) main_box_corner = box_corner[nms_indices] if nms_indices.shape[0] == 0: continue pred_boxes = [] for i in range(len(main_box_corner)): x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h box = [x0, y0, x1, y1] pred_boxes.append(box) return pred_boxes
def get_detections(model, img, templates, rf, img_transforms, prob_thresh=0.65, nms_thresh=0.3, device=None): model = model.to(device) model.eval() dets = np.empty((0, 6)) # store bbox (x1, y1, x2, y2), score and scale num_templates = templates.shape[0] # Evaluate over multiple scale scales_list = [2 ** x for x in [-1, 0, 1]] # convert tensor to PIL image so we can perform resizing image = transforms.functional.to_pil_image(img[0]) min_side = np.min(image.size) for s, scale in enumerate(scales_list): # scale the images scaled_image = transforms.functional.resize(image, np.int(min_side*scale)) # normalize the images img = img_transforms(scaled_image) # add batch dimension img.unsqueeze_(0) # now run the model x = img.float().to(device) output = model(x) # first `num_templates` channels are class maps score_cls = torch.sigmoid(output[:, :num_templates, :, :]) score_cls = score_cls.data.cpu().numpy().transpose((0, 2, 3, 1)) score_reg = output[:, num_templates:, :, :] score_reg = score_reg.data.cpu().numpy().transpose((0, 2, 3, 1)) t_bboxes, scores = get_bboxes(score_cls, score_reg, templates, prob_thresh, rf, scale) scales = np.ones((t_bboxes.shape[0], 1)) / scale # append scores at the end for NMS d = np.hstack((t_bboxes, scores, scales)) dets = np.vstack((dets, d)) # Apply NMS keep = nms(dets, nms_thresh) dets = dets[keep] return dets
def detect(self, cv_img): cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB) img = img_pil.fromarray(cv_img) w, h = img.size new_width, new_height = self.model.img_size() pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width) img = torchvision.transforms.functional.pad(img, padding=(pad_w, pad_h, pad_w, pad_h), fill=(127, 127, 127), padding_mode="constant") img = torchvision.transforms.functional.resize(img, (new_height, new_width)) bw = self.model.get_bw() if bw: img = torchvision.transforms.functional.to_grayscale( img, num_output_channels=1) img = torchvision.transforms.functional.to_tensor(img) img = img.unsqueeze(0) with torch.no_grad(): self.model.eval() img = img.to(self.device, non_blocking=True) # output,first_layer,second_layer,third_layer = model(img) output = self.model(img) for detections in output: detections = detections[detections[:, 4] > self.conf_thres] box_corner = torch.zeros((detections.shape[0], 4), device=detections.device) xy = detections[:, 0:2] wh = detections[:, 2:4] / 2 box_corner[:, 0:2] = xy - wh box_corner[:, 2:4] = xy + wh probabilities = detections[:, 4] nms_indices = nms(box_corner, probabilities, self.nms_thres) main_box_corner = box_corner[nms_indices] if nms_indices.shape[0] == 0: continue bboxes = [] for i in range(len(main_box_corner)): x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h bboxes.append([x0, y0, x1, y1]) return bboxes
def apply_nms(confidence_map, hmap, wmap, dotmap_pred_downscale=2, thresh=0.3): nms_conf_map, nms_conf_box = extract_conf_points( [confidence_map[0], confidence_map[1]], [hmap[0], hmap[1]]) nms_conf_map, nms_conf_box = extract_conf_points( [confidence_map[2], nms_conf_map], [hmap[2], nms_conf_box]) nms_conf_map, nms_conf_box = extract_conf_points( [confidence_map[3], nms_conf_map], [hmap[3], nms_conf_box]) confidence_map = nms_conf_map hmap = nms_conf_box wmap = nms_conf_box confidence_map = np.squeeze(confidence_map) hmap = np.squeeze(hmap) wmap = np.squeeze(wmap) dets_idx = np.where(confidence_map > 0) y, x = dets_idx[-2], dets_idx[-1] h, w = hmap[dets_idx], wmap[dets_idx] x1 = x - w / 2 x2 = x + w / 2 y1 = y - h / 2 y2 = y + h / 2 scores = confidence_map[dets_idx] dets = np.stack([ np.array(x1), np.array(y1), np.array(x2), np.array(y2), np.array(scores) ], axis=1) # List of indices to keep keep = nms.nms(dets, thresh) y, x = dets_idx[-2], dets_idx[-1] h, w = hmap[dets_idx], wmap[dets_idx] x = x[keep] y = y[keep] h = h[keep] w = w[keep] scores = scores[keep] return x, y, h, w, scores
def main(): args = parse.parse_args() model_path = args.model_path img_path = args.img_path if model_path.strip() == '': raise ValueError('model path should not be null') if img_path.strip() == '': raise ValueError('test img path should not be null') model = load_model(model_path) test_model = Model(model.input, [ model.get_layer('cls_output').output, model.get_layer('bbox_output').output ]) test_model.load_weights(model_path, by_name=True, skip_mismatch=True) # 获取输入信息 inputs = get_inputs() # shape (1, 128, 21) shape (1, 128, 80) cls_output, bbox_ouput = test_model(inputs) # shape (128, 21) cls_output = np.squeeze(cls_output, axis=0) # shape (128, 80) bbox_ouput = np.squeeze(bbox_ouput, axis=0) # 进行softmax cls_output = softmax(cls_output) # 找出128个边框的最大类别 shape (128, ) argmax_cls = np.argmax(cls_output, axis=1) cls_output = cls_output[argmax_cls > 0] # (n, ) n <= 128 argmax_cls = argmax_cls[argmax_cls > 0] # (n, 80) bbox_ouput = cls_output[bbox_ouput > 0] scores = np.max(cls_output, axis=1) rects = [] for i, bbox in enumerate(bbox_ouput): # 去掉背景 cls = argmax_cls[i] - 1 start = cls * 4 end = start + 4 bbox = bbox[start:end] rects.append(bbox) rects = np.asarray(rects) # 非极大值抑制 keep_ind = nms(rects, scores, 0.5) rects = rects[keep_ind, :] show_rect(img_path, rects)
def query_posecnn_detection(self, classes): # detection information of the target object rois_est = np.zeros((0, 7), dtype=np.float32) # TODO look for multiple object instances max_objects = 5 for i in range(len(classes)): for object_id in range(max_objects): # check posecnn frame cls = classes[i] suffix_frame = '_%02d_roi' % (object_id) source_frame = 'posecnn/' + cls + suffix_frame try: # print('look for posecnn detection ' + source_frame) trans, rot = self.listener.lookupTransform( self.target_frame, source_frame, rospy.Time(0)) n = trans[0] secs = trans[1] now = rospy.Time.now() if abs(now.secs - secs) > 1.0: print 'posecnn pose for %s time out %f %f' % ( source_frame, now.secs, secs) continue roi = np.zeros((1, 7), dtype=np.float32) roi[0, 0] = 0 roi[0, 1] = i roi[0, 2] = rot[0] * n roi[0, 3] = rot[1] * n roi[0, 4] = rot[2] * n roi[0, 5] = rot[3] * n roi[0, 6] = trans[2] rois_est = np.concatenate((rois_est, roi), axis=0) print('find posecnn detection ' + source_frame) except: continue if rois_est.shape[0] > 0: # non-maximum suppression within class index = nms(rois_est, 0.2) rois_est = rois_est[index, :] return rois_est
def detect_face(self, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.cuda() scale = scale.cuda() loc, conf = self.model(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.args.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(torch.tensor(boxes), torch.tensor(scores), overlap=self.args.nms_threshold) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:self.args.keep_top_k, :] return dets
def generate_paths(self): for cls_ix in range(1, self.num_classes): # skip background all_scores = np.ndarray(shape=(self.num_frame_pairs,), dtype=np.object) cls_boxes = np.ndarray(shape=(self.num_frame_pairs,), dtype=np.object) cls_scores = np.ndarray(shape=(self.num_frame_pairs,), dtype=np.object) print('Class: {}'.format(self.classes[cls_ix])) self._curr_class = self.classes[cls_ix] for pair_ix in range(self.num_frame_pairs): boxes_t0 = self.pred_boxes[pair_ix][0].clone() scores_t0 = self.scores[pair_ix][0][:,cls_ix].clone() pick = torch.nonzero(scores_t0>0.0).view(-1) # If no good scores for this frame/class, go to next frame assert pick.numel()>0, "No detections found for this class." if pick.numel()==0: all_scores[pair_ix] = torch.cuda.FloatTensor(0) # empty tensor cls_boxes[pair_ix] = torch.cuda.FloatTensor(0) # empty tensor cls_scores[pair_ix] = torch.cuda.FloatTensor(0) # empty tensor continue # Get scores that passed filter and sort highest-->lowest scores_t0 = scores_t0[pick] boxes_t0 = boxes_t0[pick, :] all_scores_t0 = self.scores[pair_ix][0][pick, :] _, pick = torch.sort(scores_t0, descending=True) # Take at most 50 per frame per class to_pick = min(10,pick.numel()) pick = pick[:to_pick] scores_t0 = scores_t0[pick] boxes_t0 = boxes_t0[pick,:] all_scores_t0 = all_scores_t0[pick,:] cls_dets_t0 = torch.cat([boxes_t0, scores_t0.contiguous().view(-1,1)], dim=1) pick = torch.from_numpy(nms(cls_dets_t0.numpy(), 0.3)) # TODO check pick is sorted in descending order # Take top 10 dets after nms pick = pick.view(-1).long() pick = pick[:min(10, pick.numel())] cls_boxes[pair_ix] = boxes_t0[pick, :].clone() cls_scores[pair_ix] = scores_t0[pick].clone() all_scores[pair_ix] = all_scores_t0[pick, :].clone() paths = self.incremental_linking(cls_boxes, cls_scores, all_scores) print("Finish incremental linking") self.all_paths[cls_ix] = paths
def detect(full_image, visualize=False): # step1: preprocess, image resize, grayscale, equalizeHist image, _ = resize(full_image) image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image_gray = cv2.equalizeHist(image_gray) # face detect face_regions = face_detector.detectMultiScale(image_gray, 1.05, 4, cv2.cv.CV_HAAR_SCALE_IMAGE, (60, 60)) # eyes detect based on face region eyes_regions = [] for (x, y, w, h) in face_regions: face_image = image_gray[y:y + h, x:x + w] # resize face, transform to high resolution face_image, scale = resize(face_image) # first eyes detection eyes_roi = eyes_detector.detectMultiScale(face_image, 1.05, 2, cv2.cv.CV_HAAR_SCALE_IMAGE, (60, 60), (120, 120)) # second eyes detection for (ex, ey, ew, eh) in eyes_roi: eyes_regions.append([ x + int(ex / scale), y + int(ey / scale), int(ew / scale), int(eh / scale) ]) # apply nms reduce bbox eyes_regions = nms(eyes_regions, thres=0.5) # visulize if visualize: # plot for (x, y, w, h) in face_regions: cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2) for (x, y, w, h) in eyes_regions: cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) # 显示标定框 cv2.imshow("eye detect", image) cv2.waitKey(5) eyes_region = [ cv2.resize(image_gray[y:y + h, x:x + w], (eye_size, eye_size)) for (x, y, w, h) in eyes_regions ] return eyes_region
def merge_outputs(self, detections): # detections: list of dets, dets: detection dict{1:det_array,2:det_array...}. det_array: shape of [k,5] # return: {1:det_array,2:det_array...}. det_array: shape of [k,5] res_dets = {} for j in range(1, self.cfg.NUM_CLASS): res_dets[j] = np.concatenate([dets[j] for dets in detections], axis=0).astype(np.float32) if len(self.scales) > 1 or self.cfg.NMS: res_index = nms(res_dets[j], 0.5) res_dets[j] = res_dets[j][res_index] scores = np.hstack( [res_dets[j][:, 4] for j in range(1, self.cfg.NUM_CLASS)]) if len(scores) > self.max_per_image: kth = len(scores) - self.max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, self.cfg.NUM_CLASS): keep_inds = (res_dets[j][:, 4] >= thresh) res_dets[j] = res_dets[j][keep_inds] return res_dets
def forward(self, score, reg_param, anchors, im_info): # Apply Regression rois = apply_reg(anchors, reg_param) rois[0::2].clamp_(0, im_info[0] - 1) rois[1::2].clamp_(0, im_info[1] - 1) # Pre-NMS Top-K Selection score_foreground = score[:, :, 0].squeeze(0) _, order = torch.sort(score_foreground, descending=True) if (cfg.pre_nms_topk > 0 and cfg.pre_nms_topk < score_foreground.size(0)): order = order[:cfg.pre_nms_topk] rois = rois[order, :] score_foreground = score_foreground[order] # NMS nms_keep_index = nms(rois, score_foreground, cfg.rpn_nms_thr) rois = rois[nms_keep_index, :] # Aft-NMS Top-K Selection if (cfg.aft_nms_topk > 0 and cfg.aft_nms_topk < nms_keep_index.size(0)): rois = rois[:cfg.aft_nms_topk, :] score_foreground = score_foreground[:cfg.aft_nms_topk] return rois
def detect(image, model, priors): """ """ h, w = image.shape[:2] image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE)) image = image.astype('float32') images = np.expand_dims(image, axis=0) confs, locs = model(images, training=False) boxes = decode(priors, tf.squeeze(locs, 0)) boxes = boxes.numpy() scale = np.array([w, h, w, h]) boxes = boxes * scale confs = tf.squeeze(confs, 0) scores = confs.numpy() scores = scores[:, 1] # Ignore low scores inds = np.where(scores > FLAGS.conf_threshold)[0] boxes = boxes[inds] scores = scores[inds] # Keep top-k before NMS order = scores.argsort()[::-1][:FLAGS.top_k] boxes = boxes[order] scores = scores[order] # NMS dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) selected_idx = np.array([0, 1, 2, 3, 4]) keep = nms(dets[:, selected_idx], FLAGS.nms_threshold) dets = dets[keep, :] dets = dets[:FLAGS.keep_top_k, :] return dets
def _rpn_proposal(self, rpn_reg_locs, rpn_cls_score): """Deduction the output of RPN, is input of RoI. NMS_pre_TopN -> apply nms -> NMS_post_TopN """ anchors = tf.py_func(anchor_generate, [self._h, self._w], [tf.float32]) anchors = to_box_ctr(anchors) boxes_regressed = anchor_regress( anchors, rpn_reg_locs) # apply transform to all anchors rpn_boxes = to_box_cor(boxes_regressed) rpn_score_arg = tf.argsort(rpn_cls_score, direction='DESCENDING') rpn_arg_top_pre = rpn_score_arg[:cfg.TRAIN.NMS_PRE_TOPN] rpn_boxes_top_pre = tf.gather(rpn_boxes, rpn_arg_top_pre) rpn_score_top_pre = tf.gather(rpn_cls_score, rpn_arg_top_pre) indices = nms(rpn_boxes_top_pre, rpn_score_top_pre, cfg.TRAIN.NMS_POST_TOPN, cfg.NMS_THRESH) rpn_proposal = tf.gather(rpn_boxes_top_pre, indices) rpn_proposal_cropped = box_cropper(rpn_proposal, self._h, self._w) return rpn_proposal_cropped # train: [2000, 4] y1, x1, y2, x2
def post_process(dets, c, s, h, w, num_classes, score_thresh): # dets: [1, N*K, 6] det:[x1,y1,x2,y2,score,class_id] # return top_preds{1: list of [x1,y1,x2,y2,score], 2: list of [x1,y1,x2,y2,score] ... } top_preds = {} # transform x1,y1 dets[:, :2] = transform_preds( dets[:, 0:2], c[0], s[0], (w, h)) # transform x2,y2 dets[:, 2:4] = transform_preds( dets[:, 2:4], c[0], s[0], (w, h)) # do nms on dets before assign class keep = nms(dets, 0.5) dets = dets[keep] # get bbox and score for every class classes = dets[:, -1] scores = dets[:, 4] for j in range(1, num_classes): inds = ((classes == j) * (scores > score_thresh)) top_preds[j] = np.concatenate([ dets[inds, :4].astype(np.float32), dets[inds, 4:5].astype(np.float32)], axis=1).tolist() return top_preds
def main(): total_timer = Timer(name='total') total_timer.tic() log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) # ----------------------------------------------------------------------------------------------------- # ------------- 2. Load Plugin for inference engine and extensions library if specified -------------- log.info("Loading Inference Engine") ie = IECore() log.info("Device info:") versions = ie.get_versions(args.device) print("{}{}".format(" " * 8, args.device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[args.device].major, versions[args.device].minor)) print("{}Build ........... {}".format(" " * 8, versions[args.device].build_number)) if args.cpu_extension and "CPU" in args.device: ie.add_extension(args.cpu_extension, "CPU") log.info("CPU extension loaded: {}".format(args.cpu_extension)) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) # ----------------------------------------------------------------------------------------------------- # --------------------------- 4. Configure input & output --------------------------------------------- # --------------------------- Prepare input blobs ----------------------------------------------------- log.info("Preparing input blobs") assert (len(net.inputs.keys()) == 1 ), "Sample supports topologies only with 1 input" input_name = next(iter(net.inputs.keys())) input_info = net.inputs[input_name] input_info.precision = 'FP32' # --------------------------- Prepare output blobs ---------------------------------------------------- log.info('Preparing output blobs') assert (len(net.outputs.keys()) == 2 ), "Sample supports topologies only with 2 output" loc_out_name = "797" class_out_name = "741" assert (loc_out_name in net.outputs.keys()) and (class_out_name in net.outputs.keys()) loc_out_info = net.outputs[loc_out_name] class_out_info = net.outputs[class_out_name] loc_out_info.precision = "FP32" class_out_info.precision = "FP32" # ----------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------- log.info("Loading model to the device") exec_net = ie.load_network(network=net, device_name=args.device) # --------------------------- 3. Read and preprocess input -------------------------------------------- # ----------------------------------------------------------------------------------------------------- if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) if args.voc_res_file and os.path.exists(args.voc_res_file): os.remove(args.voc_res_file) create_anchor_timer = Timer(name='create_anchor') read_img_timer = Timer(name='read_img') preprocess_timer = Timer(name='preprocess') infer_timer = Timer(name='infer') adapter_timer = Timer(name='adapter') patch_img_nms_timer = Timer(name='patch_img_nms') whole_img_nms_timer = Timer(name='whole_img_nms') add_offset_timer = Timer(name='add_offset') write_result_timer = Timer(name='write_result') create_anchor_timer.tic() adapter = RetinaNetAdapter(input_shape=args.patch_size) create_anchor_timer.toc() image_names = os.listdir(args.image_dir) log.info("image_nums: {}".format(len(image_names))) for image_id, image_name in enumerate(image_names): read_img_timer.tic() image_path = os.path.join(args.image_dir, image_name) img = cv2.imread(image_path).astype('float32') read_img_timer.toc() height, width, _ = img.shape image_shape = (width, height) strides = args.strides patch_size = args.patch_size x_num, y_num = calc_split_num(image_shape, patch_size, strides) log.info("id:{}, name: {}, shape: ({},{}), x_num:{}, y_num:{}".format( image_id, image_name, height, width, x_num, y_num)) preprocess_timer.tic() img = img.transpose((2, 0, 1)) # Change data layout from HWC to CHW preprocess_timer.toc() result_all = [] for i in range(x_num): for j in range(y_num): x = strides[0] * i if i < x_num - 1 else image_shape[ 0] - args.patch_size[0] y = strides[1] * j if j < y_num - 1 else image_shape[ 1] - args.patch_size[1] # print('processing {} , x: {}, y: {}'.format(image_name, x, y)) preprocess_timer.tic() crop_img = img[:, y:y + patch_size[1], x:x + patch_size[0]].copy() crop_img = crop_img[np.newaxis, :, :, :] preprocess_timer.toc() # --------------------------- Performing inference ---------------------------------------------------- infer_timer.tic() res = exec_net.infer(inputs={input_name: crop_img}) loc_out = res[loc_out_name][0] class_out = res[class_out_name][0] infer_timer.toc() adapter_timer.tic() result = adapter.process(loc_out, class_out) adapter_timer.toc() patch_img_nms_timer.tic() result, _ = nms(result, thresh=0.5, keep_top_k=100) patch_img_nms_timer.toc() # import pdb;pdb.set_trace() add_offset_timer.tic() result[:, 0] += x result[:, 1] += y result[:, 2] += x result[:, 3] += y result_all.append(result) add_offset_timer.toc() # import pdb;pdb.set_trace() whole_img_nms_timer.tic() result_all = np.concatenate(result_all, axis=0) nms_result, _ = nms(result_all, thresh=0.5) whole_img_nms_timer.toc() write_result_timer.tic() voc_format = '{} {:.4f} {} {} {} {}' pos_all = [] voc_all = [] for i in range(nms_result.shape[0]): x = int(nms_result[i, 0]) y = int(nms_result[i, 1]) w = max(int(nms_result[i, 2] - nms_result[i, 0]), 1) h = max(int(nms_result[i, 3] - nms_result[i, 1]), 1) p = float(nms_result[i, 4]) pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p} pos_all.append(pos) if args.voc_res_file: xmin = x ymin = y xmax = int(nms_result[i, 2]) ymax = int(nms_result[i, 3]) voc_str = voc_format.format( os.path.splitext(image_name)[0], p, xmin, ymin, xmax, ymax) voc_all.append(voc_str) file_name = os.path.splitext(image_name)[0] + '.json' with open(os.path.join(args.result_dir, file_name), 'w') as f: json.dump(pos_all, f) if args.voc_res_file: with open(args.voc_res_file, 'a') as f: for voc_str in voc_all: f.write(voc_str + '\n') write_result_timer.toc() total_timer.toc() # ----------------------------------------------------------------------------------------------------- all_timers = [] all_timers.extend([ create_anchor_timer, read_img_timer, preprocess_timer, infer_timer, adapter_timer, patch_img_nms_timer, whole_img_nms_timer, add_offset_timer, write_result_timer, total_timer ]) for timer in all_timers: log.info('{}: avg: {:.2f} ms, total: {:.2f}s'.format( timer.name, timer.avg * 1000, timer.total)) log.info("Execution successful\n")
def _proposal_layer(rpn_bbox_cls, rpn_bbox_pred, im_size, feat_stride, eval_mode): """ :param rpn_bbox_cls: (None, H, W, 2 * k) :param rpn_bbox_pred: (None, H, W, 4 * k) :param im_size: (800, 600) :param feat_stride: 16 :return: """ rpn_bbox_cls_prob = rpn_softmax(rpn_bbox_cls) anchor = Anchors(feat_stride=feat_stride) # all_anchors (A * H * W, 4) anchors, A = anchor.get_anchors() num_anchors = A # (1, 2 * k, H, W) rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) # (1, 4 * k, H, W) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) assert rpn_bbox_cls_prob.shape[0] == 1, 'Only support 1 batch_size' if not eval_mode: # 训练模式 pre_nms_topN = cfg.train_rpn_pre_nms_top_n post_nms_topN = cfg.train_rpn_post_nms_top_n nms_thresh = cfg.train_rpn_nms_thresh min_size = cfg.train_rpn_min_size else: # 验证模式 pre_nms_topN = cfg.test_rpn_pre_nms_top_n post_nms_topN = cfg.test_rpn_post_nms_top_n nms_thresh = cfg.test_rpn_nms_thresh min_size = cfg.test_rpn_min_size # 对于预测的cls 前9个表示背景 后9个表示前景 scores = rpn_bbox_cls_prob[:, num_anchors:, :, :] bbox_deltas = rpn_bbox_pred # (1, 4 * k, H, W) -> (1, H, W, 4 * A) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # 根据anchor 和 bbox 预测值 回归出来真正的anchor 从dx dy dw dh --> cx cy w, h proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_size) # 3. remove predicted boxes with either height or width < threshold keep = _filter_boxes(proposals, min_size) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] # scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def detect_onetwork(self, image_input, dets): image_height, image_width, image_channels = image_input.shape if dets is None: return None, None dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, image_width, image_height) num_boxes = dets.shape[0] cropped_ims_tensors = [] for i in range(num_boxes): try: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = image_input[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (48, 48)) crop_im_tensor = self.convert_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) except: continue try: feed_imgs = Variable(torch.stack(cropped_ims_tensors)) except: return None, None detection, bbox = self.o_network(feed_imgs.float()) detection = detection.data.numpy() bbox = bbox.data.numpy() keep_inds = np.where(detection > self.threshold[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] detection = detection[keep_inds] bbox = bbox[keep_inds] else: return None, None keep = nms(boxes, 0.7, mode="Minimum") if len(keep) == 0: return None, None keep_detection = detection[keep] keep_boxes = boxes[keep] keep_bbox = bbox[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 align_topx = keep_boxes[:, 0] + keep_bbox[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_bbox[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_bbox[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_bbox[:, 3] * bh boxes = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_detection[:, 0] ]) boxes_align = boxes.T return boxes_align
def detect_pnetwork(self, image_input): image_height, image_width, image_channels = image_input.shape net_size = 12 final_boxes = [] current_scale = float(net_size) / self.min_face_size image_resized = self.resize_image(image_input, current_scale) current_height, current_weight, _ = image_resized.shape while min(current_height, current_weight) > net_size: image_list = [] image_resized_tensor = self.convert_to_tensor(image_resized) image_list.append(image_resized_tensor) image_list = torch.stack(image_list) image_list = Variable(image_list) detection, bbox = self.p_network(image_list.float()) detection = np.transpose(detection.data.numpy(), (0, 2, 3, 1)) bbox = np.transpose(bbox.data.numpy(), (0, 2, 3, 1)) boxes = self.generate_bounding_boxes(detection[0, :, :], bbox, current_scale, self.threshold[0]) current_scale *= self.scale_factor image_resized = self.resize_image(image_input, current_scale) current_height, current_weight, _ = image_resized.shape if (boxes.size) == 0: continue keep = nms(boxes[:, :5], 0.5, 'Union') boxes = boxes[keep] final_boxes.append(boxes) if len(final_boxes) == 0: return None, None final_boxes = np.vstack(final_boxes) keep = nms(final_boxes[:, 0:5], 0.7, 'Union') final_boxes = final_boxes[keep] bw = final_boxes[:, 2] - final_boxes[:, 0] + 1 bh = final_boxes[:, 3] - final_boxes[:, 1] + 1 boxes = np.vstack([ final_boxes[:, 0], final_boxes[:, 1], final_boxes[:, 2], final_boxes[:, 3], final_boxes[:, 4], ]) boxes = boxes.T align_topx = final_boxes[:, 0] + final_boxes[:, 5] * bw align_topy = final_boxes[:, 1] + final_boxes[:, 6] * bh align_bottomx = final_boxes[:, 2] + final_boxes[:, 7] * bw align_bottomy = final_boxes[:, 3] + final_boxes[:, 8] * bh boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, final_boxes[:, 4], ]) boxes_align = boxes_align.T return boxes, boxes_align
def update(self, image: np.ndarray, boxes: np.ndarray, scores: np.ndarray) \ -> Iterable[trace.Trace]: self.frame += 1 refind, lost = [], [] activated, removed = [], [] # Step 1. Prediction for track in chain(self.tracked, self.lost): track.predict() # Step 2. Selection by score if scores is None: scores = np.ones(np.size(boxes, 0), dtype=float) detections = list(chain( map(lambda t: trace.Trace(*t, from_det=True), zip(boxes, scores)), map(lambda t: trace.Trace(*t, from_det=False), zip(boxes, scores)) )) self.classifier.update(image) detections.extend(map(lambda t: trace.Trace(t.tracking(image), t.track_score, from_det=True), filter(lambda t: t.is_activated, chain(self.tracked, self.lost)))) rois = np.asarray(list(map(lambda t: t.to_tlbr, detections)), np.float32) class_scores = self.classifier.predict(rois) scores = np.concatenate([ np.ones(np.size(boxes, 0), dtype=np.float32), np.fromiter(map(lambda t: t.score, detections[np.size(boxes, 0):]), dtype=np.float32) ]) * class_scores # Non-maxima suppression if len(detections) > 0: mask = np.zeros(np.size(rois, 0), dtype=np.bool) mask[list(nms(rois, scores.reshape(-1), threshold=.4))] = True indices = np.zeros_like(detections, dtype=np.bool) indices[np.where(mask & (scores >= self.min_score))] = True detections = list(compress(detections, indices)) scores = scores[indices] for detection, score in zip(detections, scores): detection.score = score predictions = list(filter(lambda t: not t.from_det, detections)) detections = list(filter(lambda t: t.from_det, detections)) # set features features = self.identifier.extract(image, np.asarray( list(map(lambda t: t.to_tlbr, detections)), dtype=np.float32) ) for idx, detection in enumerate(detections): detection.feature = features[idx] # Step3. Association for tracked # matching for tracked target unconfirmed = list(filter(lambda t: not t.is_activated, self.tracked)) tracked = list(filter(lambda t: t.is_activated, self.tracked)) distance = matching.nearest_distance(tracked, detections, metric='euclidean') cost = matching.gate_cost(self.motion, distance, tracked, detections) matches, u_track, u_detection = matching.assignment(cost, threshold=self.min_dist) for track, det in matches: tracked[track].update(self.frame, image, detections[det]) # matching for missing targets detections = list(map(lambda u: detections[u], u_detection)) distance = matching.nearest_distance(self.lost, detections, metric='euclidean') cost = matching.gate_cost(self.motion, distance, self.lost, detections) matches, u_lost, u_detection = matching.assignment(cost, threshold=self.min_dist) for miss, det in matches: self.lost[miss].reactivate(self.frame, image, detections[det], reassign=not self.use_refind) refind.append(self.lost[miss]) # remaining tracked matched_size = len(u_detection) detections = list(map(lambda u: detections[u], u_detection)) + predictions u_tracked = list(map(lambda u: tracked[u], u_track)) distance = matching.iou_distance(u_tracked, detections) matches, u_track, u_detection = matching.assignment(distance, threshold=.8) for track, det in matches: u_tracked[track].update(self.frame, image, detections[det], update_feature=True) for track in map(lambda u: u_tracked[u], u_track): track.lost() lost.append(track) # unconfirmed detections = list(map(lambda u: detections[u], filter(lambda u: u < matched_size, u_detection))) distance = matching.iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = matching.assignment(distance, threshold=.8) for track, det in matches: unconfirmed[track].update(self.frame, image, detections[det], update_feature=True) for track in map(lambda u: unconfirmed[u], u_unconfirmed): track.remove() removed.append(track) # Step 4. Init new trace for track in filter(lambda t: t.from_det and t.score >= .6, map(lambda u: detections[u], u_detection)): track.activate(self.frame, image, self.motion) activated.append(track) # Step 5. Update state for track in filter(lambda t: self.frame - t.frame > self.max_lost, self.lost): track.remove() removed.append(track) self.tracked = list(chain( filter(lambda t: t.state == trace.State.Tracked, self.tracked), activated, refind, )) self.lost = list(chain( filter(lambda t: t.state == trace.State.Lost, self.lost), lost )) self.removed.extend(removed) lost_score = self.classifier.predict( np.asarray(list(map(lambda t: t.to_tlbr, self.lost)), dtype=np.float32) ) return chain( filter(lambda t: t.is_activated, self.tracked), map(lambda it: it[1], filter(lambda it: lost_score[it[0]] > .3 and self.frame - it[1].frame <= 4, enumerate(self.lost))) )
def validate(*, dataloader, model, device, step=-1, bbox_all=False, debug_mode): # result = open("logs/result.txt", "w" ) with torch.no_grad(): t_start = time.time() conf_thres, nms_thres, iou_thres = model.get_threshs() width, height = model.img_size() model.eval() print("Calculating mAP - Model in evaluation mode") n_images = len(dataloader.dataset) mAPs = [] mR = [] mP = [] for batch_i, (img_uris, imgs, targets) in enumerate( tqdm(dataloader, desc='Computing mAP')): imgs = imgs.to(device, non_blocking=True) targets = targets.to(device, non_blocking=True) # output,_,_,_ = model(imgs) output = model(imgs) for sample_i, (labels, detections) in enumerate(zip(targets, output)): detections = detections[detections[:, 4] > conf_thres] if detections.size()[0] == 0: predictions = torch.tensor([]) else: predictions = torch.argmax(detections[:, 5:], dim=1) # From (center x, center y, width, height) to (x1, y1, x2, y2) box_corner = torch.zeros((detections.shape[0], 4), device=detections.device) xy = detections[:, 0:2] wh = detections[:, 2:4] / 2 box_corner[:, 0:2] = xy - wh box_corner[:, 2:4] = xy + wh probabilities = detections[:, 4] nms_indices = nms(box_corner, probabilities, nms_thres) box_corner = box_corner[nms_indices] probabilities = probabilities[nms_indices] predictions = predictions[nms_indices] if nms_indices.shape[ 0] == 0: # there should always be at least one label continue # Get detections sorted by decreasing confidence scores _, inds = torch.sort(-probabilities) box_corner = box_corner[inds] probabilities = probabilities[inds] predictions = predictions[inds] labels = labels[(labels[:, 1:5] <= 0).sum( dim=1 ) == 0] # remove the 0-padding added by the dataloader # Extract target boxes as (x1, y1, x2, y2) target_boxes = xywh2xyxy(labels[:, 1:5]) target_boxes[:, (0, 2)] *= width target_boxes[:, (1, 3)] *= height detected = torch.zeros(target_boxes.shape[0], device=target_boxes.device, dtype=torch.uint8) correct = torch.zeros(nms_indices.shape[0], device=box_corner.device, dtype=torch.uint8) # 0th dim is the detection # (repeat in the 1st dim) # 2nd dim is the coord ious = bbox_iou( box_corner.unsqueeze(1).expand(-1, target_boxes.shape[0], -1), target_boxes.unsqueeze(0).expand(box_corner.shape[0], -1, -1)) # ious is 2d -- 0th dim is the detected box, 1st dim is the target box, value is iou ####################################################### ##### skip images without label ##### if [] in ious.data.tolist(): continue ####################################################### best_is = torch.argmax(ious, dim=1) # TODO fix for multi-class. Need to use predictions somehow? for i, iou in enumerate(ious): best_i = best_is[i] if ious[i, best_i] > iou_thres and detected[best_i] == 0: correct[i] = 1 detected[best_i] = 1 # Compute Average Precision (AP) per class ap, r, p = average_precision(tp=correct, conf=probabilities, n_gt=labels.shape[0]) # Compute mean AP across all classes in this image, and append to image list mAPs.append(ap) mR.append(r) mP.append(p) if bbox_all or sample_i < 2: # log the first two images in every batch img_filepath = img_uris[sample_i] if img_filepath is None: print( "NULL image filepath for image uri: {uri}".format( uri=img_uris[sample_i])) orig_img = Image.open(img_filepath) # draw = ImageDraw.Draw(img_with_boxes) w, h = orig_img.size pad_h, pad_w, scale_factor = calculate_padding( h, w, height, width) ################################## detect_box = copy.deepcopy(box_corner) ################################## box_corner /= scale_factor box_corner[:, (0, 2)] -= pad_w box_corner[:, (1, 3)] -= pad_h ####################################################################################### if debug_mode: pil_img = transforms.ToPILImage()(imgs.squeeze()) ##### getting the image's name ##### img_path = img_uris[0] img_name = ("_".join(map(str, img_path.split("_")[-5:]))) tmp_path = os.path.join( visualization_tmp_path, img_name[:-4] + "_predicted_vis.jpg") vis_label = add_class_dimension_to_labels(detect_box) visualize_and_save_to_local(pil_img, vis_label, tmp_path, box_color="red") print("Prediction visualization uploaded") ####################################################################################### mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item() mean_R = torch.tensor(mR, dtype=torch.float).mean().item() mean_P = torch.tensor(mP, dtype=torch.float).mean().item() # Means of all images mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item() mean_R = torch.tensor(mR, dtype=torch.float).mean().item() mean_P = torch.tensor(mP, dtype=torch.float).mean().item() dt = time.time() - t_start print('mAP: {0:5.2%}, Recall: {1:5.2%}, Precision: {2:5.2%}'.format( mean_mAP, mean_R, mean_P)) # result.write(str(1-mean_mAP)) # result.close() return mean_mAP, mean_R, mean_P, dt / (n_images + 1e-12)
def evaluate(model, dataloader, templates, prob_thresh=0.65, nms_thresh=0.3, device=None): #TODO check Peiyun's code to see the correct way to perform NMS print("Running multiscale evaluation code") model = model.eval().to(device) # Evaluate over multiple scale scales_list = [0.5**x for x in [1, 0, -1]] num_templates = templates.shape[0] results = [] to_pil_image = transforms.ToPILImage() for idx, (img, filename) in tqdm(enumerate(dataloader), total=len(dataloader)): dets = np.empty((0, 6)) # store bbox (x1, y1, x2, y2), score and scale # convert tensor to PIL image so we can perform resizing image = to_pil_image(img[0]) min_side = np.min(image.size) for s, scale in enumerate(scales_list): # scale the images scaled_image = transforms.Resize(np.int(min_side * scale))(image) # normalize the images img = dataloader.dataset.transforms(scaled_image) # add batch dimension img.unsqueeze_(0) # now run the model x = img.float().to(device) output = model(x) # first `num_templates` channels are class maps score_cls = torch.sigmoid(output[:, :num_templates, :, :]) score_cls = score_cls.data.cpu().numpy().transpose((0, 2, 3, 1)) score_reg = output[:, num_templates:, :, :] score_reg = score_reg.data.cpu().numpy().transpose((0, 2, 3, 1)) t_bboxes, scores = get_bboxes(score_cls, score_reg, templates, prob_thresh, dataloader.dataset.rf, scale) scales = np.ones((t_bboxes.shape[0], 1)) / scale # append scores at the end for NMS d = np.hstack((t_bboxes, scores, scales)) dets = np.vstack((dets, d)) # Apply NMS keep = nms(dets, nms_thresh) dets = dets[keep] return dets
def valid(datacfg, weight_file, outfile_prefix): options = read_data_cfg(datacfg) valid_images_set_file = options['valid'] namesfile = options['names'] #load class names class_names = load_class_names(namesfile) #load valid image with open(valid_images_set_file, 'r') as fp: image_files = fp.readlines() image_files = [file.rstrip() for file in image_files] model = yolo_v2() model.load_weights(weight_file) print("weights %s loaded" % (weight_file)) if torch.cuda.is_available(): model.cuda() model.eval() #result file fps = [0] * model.num_classes if not os.path.exists('results'): os.mkdir('results') dir_name = 'results/%s_%s_%s' % (namesfile.split('/')[-1].split('.')[0], weight_file.split('/')[-1].split('.')[0], time.strftime("%Y%m%d_%H%M%S", time.localtime())) print 'save results to %s' % (dir_name) if not os.path.exists(dir_name): os.mkdir(dir_name) for i in range(model.num_classes): buf = "%s/%s_%s.txt" % (dir_name, outfile_prefix, class_names[i]) fps[i] = open(buf, 'w') #construct datalist valid_dataset = VOCDataset(image_files, shape=(model.width, model.height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=4, shuffle=False, num_workers=4, pin_memory=True) conf_thresh = 0.005 nms_thresh = 0.45 LineId = -1 for batch_index, (data, target) in enumerate(valid_loader): data = data.cuda() data = Variable(data, volatile=True) output = model(data).data batch_boxes = model.get_region_boxes(output, conf_thresh) for i in range(len(batch_boxes)): boxes = batch_boxes[i] boxes = nms(boxes, nms_thresh) LineId = LineId + 1 image_name = image_files[LineId] print "[Batch_index:%d] [%d/%d] file:%s " % ( batch_index, LineId + 1, len(image_files), image_name) img_orig = Image.open(image_name) #print img_orig height, width = img_orig.height, img_orig.width print " height %d, width %d, bbox num %d" % (height, width, len(boxes)) for box in boxes: x1 = (box[0] - box[2] / 2.0) * width y1 = (box[1] - box[3] / 2.0) * height x2 = (box[0] + box[2] / 2.0) * width y2 = (box[1] + box[3] / 2.0) * height det_conf = box[4] cls_conf = box[5] cls_id = box[6] fps[cls_id].write( "%s %f %f %f %f %f\n" % (image_name, det_conf * cls_conf, x1, y1, x2, y2)) for i in range(model.num_classes): fps[i].close()