class Pelee_Det(object): def __init__(self): self.anchor_config = anchors(cfg.model) self.priorbox = PriorBox(self.anchor_config) self.net = build_net('test', cfg.model.input_size, cfg.model) init_net(self.net, cfg, args.trained_model) self.net.eval() self.num_classes = cfg.model.num_classes with torch.no_grad(): self.priors = self.priorbox.forward() self.net = self.net.cuda() self.priors = self.priors.cuda() cudnn.benchmark = True self._preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) self.detector = Detect(num_classes, cfg.loss.bkg_label, self.anchor_config) def detect(self, image): loop_start = time.time() w, h = image.shape[1], image.shape[0] img = self._preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = self.net(img) boxes, scores = self.detector.forward(out, self.priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] count = 0 # for j in [2, 6, 7, 14, 15]: for j in range(1, len(ch_labels)): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1, args.thresh) return infos, im2show
if 'm2det' in fname: continue # ignore the detected images image = cv2.imread(fname, cv2.IMREAD_COLOR) else: ret, image = capture.read() if not ret: cv2.destroyAllWindows() capture.release() break loop_start = time.time() w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms ) #min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
def demo(v_f): cfg = Config.fromfile(config_f) anchor_config = anchors(cfg) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, checkpoint_path) net.eval().to(device) with torch.no_grad(): priors = priorbox.forward().to(device) _preprocess = BaseTransform( cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') cap = cv2.VideoCapture(v_f) logging.info('detect on: {}'.format(v_f)) logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4)))) out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4)))) while True: ret, image = cap.read() if not ret: out_video.release() cv2.destroyAllWindows() cap.release() break w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0).to(device) scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0]*scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms # min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist()+[j] for _ in c_dets]) if len(allboxes) > 0: allboxes = np.array(allboxes) # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5 allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]] logging.info('allboxes shape: {}'.format(allboxes.shape)) res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2) # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6) cv2.imshow('rr', res) out_video.write(res) cv2.waitKey(1)
def main(): mean = (104, 117, 123) print('loading model!') if deform: from model.dualrefinedet_vggbn import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=1024, def_groups=deform, multihead=multihead, bn=bn) else: from model.refinedet_vgg import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, use_refine=refine, c7_channel=1024, bn=bn) net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model!', trained_model) net = net.to(device) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) for i, line in enumerate(open(img_set, 'r')): # if i==10: # break if 'COCO' in dataset: image_name = line[:-1] image_id = int(image_name.split('_')[-1]) elif 'VOC' in dataset: image_name = line[:-1] image_id = -1 else: image_name, image_id = line.split(' ') image_id = image_id[:-1] print(i, image_name, image_id) image_path = os.path.join(img_root, image_name + '.jpg') image = cv2.imread(image_path, 1) h, w, _ = image.shape image_draw = cv2.resize(image.copy(), (640, 480)) im_trans = base_transform(image, ssd_dim, mean) ######################## Detection ######################## with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if 'RefineDet' in backbone and refine: arm_loc, _, loc, conf = net(x) else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) ############################################################ out = list() for j in range(1, detections.size(1)): dets = detections[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() boxes_np = boxes.cpu().numpy() for b, s in zip(boxes_np, scores): if save_dir: out.append( [int(b[0]), int(b[1]), int(b[2]), int(b[3]), j - 1, s]) if 'COCO' in dataset: det_list.append({ 'image_id': image_id, 'category_id': labelmap[j], 'bbox': [ float('{:.1f}'.format(b[0])), float('{:.1f}'.format(b[1])), float('{:.1f}'.format(b[2] - b[0] + 1)), float('{:.1f}'.format(b[3] - b[1] + 1)) ], 'score': float('{:.2f}'.format(s)) }) else: results_file.write( str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' + str(np.around(b[0], 2)) + ' ' + str(np.around(b[1], 2)) + ' ' + str(np.around(b[2], 2)) + ' ' + str(np.around(b[3], 2)) + '\n') if display: cv2.rectangle(image_draw, (int(b[0] / w * 640), int(b[1] / h * 480)), (int(b[2] / w * 640), int(b[3] / h * 480)), (0, 255, 0), thickness=1) cls = class_name[j] if 'COCO' in dataset else str( labelmap[j - 1]) put_str = cls + ':' + str(np.around(s, decimals=2)) cv2.putText( image_draw, put_str, (int(b[0] / w * 640), int(b[1] / h * 480) - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, color=(0, 255, 0), thickness=1) if display: cv2.imshow('frame', image_draw) ch = cv2.waitKey(0) if ch == 115: if save_dir: print('save: ', line) torch.save( out, os.path.join(save_dir, '%s.pkl' % str(line[:-1]))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(line[:-1])), image) cv2.imwrite( os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])), image_draw) cv2.destroyAllWindows() if save_dir: if dataset == 'COCO': json.dump(det_list, results_file) results_file.close()
class ObjDetector(object): def __init__(self, img_size=300, thresh=0.56): assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512' self.labels_name = LABELS_SET self.labels_numb = len(LABELS_SET) self.img_size = img_size self.cfg = VOC_300 if img_size == 300 else VOC_512 self.thresh = thresh self.gpu_is_available = torch.cuda.is_available() self.gpu_numb = torch.cuda.device_count() self.net = build_net('test', self.img_size, self.labels_numb) self.detect = Detect(self.labels_numb, 0, self.cfg) self.transform = BaseTransform(self.img_size) # load net weights state_dict = torch.load(trained_model, map_location='cpu') new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() print('Finished loading model!') if self.gpu_numb > 1: self.net = torch.nn.DataParallel(self.net, device_ids=list( range(self.gpu_numb))) # set net gpu or cpu model if self.gpu_is_available: self.net.cuda() cudnn.benchmark = True # define box generator priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = priorbox.forward() if self.gpu_is_available: self.priors = self.priors.cuda() def __net__(self, img): scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) with torch.no_grad(): x = self.transform(img).unsqueeze(0) if self.gpu_is_available: x = x.cuda() scale = scale.cuda() # get net output out = self.net(x) boxes, scores = self.detect.forward(out, self.priors) boxes = boxes[0] scores = scores[0] # scale each detection back up to the image boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() return boxes, scores def __call__(self, image): """ :param image: rgb image :return: {'label_name':[x1,y1,x2,y2,score],...} """ boxes = np.empty((0, 4)) scores = np.empty((0, self.labels_numb)) for img, p in self.__chips__(image): b = [p[0], p[1], p[0], p[1]] boxes_t, scores_t = self.__net__(img) boxes_t += list(map(float, b)) boxes = np.vstack((boxes, boxes_t)) scores = np.vstack((scores, scores_t)) # filter bounding boxes results = dict() for j in range(1, self.labels_numb): inds = np.where(scores[:, j] > self.thresh)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keeped = nms(c_dets, 0.45, force_cpu=0) c_dets = c_dets[keeped, :] results[self.labels_name[j]] = c_dets return results def __chips__(self, image): h, w, _ = image.shape x = w // 2 y = h // 2 boxes = [] if min(h, w) > 1500: boxes.append((0, 0, x, y)) boxes.append((x, 0, w, y)) boxes.append((0, y, x, h)) boxes.append((x, y, w, h)) boxes.append((x // 2, y // 2, x + x // 2, y + y // 2)) else: boxes.append((0, 0, w, h)) for p in boxes: yield image[p[1]:p[3], p[0]:p[2]], p def draw(self, image, results): # draw bounding boxes for label, boxes in results.items(): for value in boxes: x1 = int(value[0]) y1 = int(value[1]) x2 = int(value[2]) y2 = int(value[3]) # label name and scores text = label + ',' + "%.2f" % value[4] # select color indx = self.labels_name.index(label) % len(COLORS) color = COLORS[indx] # draw bounding boxe cv2.rectangle(image, (x1, y1), (x2, y2), color, 2) # draw label font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 0.58 size = cv2.getTextSize(text, font, font_scale, 1) # text_w = size[0][0] text_h = size[0][1] cv2.putText(image, text, (x1, max((y1 - text_h), 0)), font, font_scale, color, 1) return image
def test(img_path, model_path='weights/RFB_vgg_COCO_30.3.pth'): img_path = img_path trained_model = model_path if torch.cuda.is_available(): cuda = True if 'mobile' in model_path: cfg = COCO_mobile_300 else: cfg = COCO_300 priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if cuda: priors = priors.cuda() numclass = 81 img = cv2.imread(img_path) scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) if 'mobile' in model_path: net = build_rfb_mobilenet('test', 300, numclass) # initialize detector else: net = build_rfb_vgg_net('test', 300, numclass) # initialize detector transform = BaseTransform(net.size, (123, 117, 104), (2, 0, 1)) with torch.no_grad(): x = transform(img).unsqueeze(0) x = Variable(x) if cuda: x = x.cuda() scale = scale.cuda() state_dict = torch.load(trained_model)['state_dict'] # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() if cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() print('Finished loading model!') # print(net) detector = Detect(numclass, 0, cfg) tic = time.time() out = net(x) # forward pass boxes, scores = detector.forward(out, priors) print('Finished in {}'.format(time.time() - tic)) boxes = boxes[0] scores = scores[0] boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # Create figure and axes # Display the image # scale each detection back up to the image for j in range(1, numclass): # print(max(scores[:, j])) inds = np.where(scores[:, j] > 0.6)[0] # conf > 0.6 if inds is None: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(c_dets, 0.6) c_dets = c_dets[keep, :] c_bboxes = c_dets[:, :4] # print(c_bboxes.shape) # print(c_bboxes.shape[0]) if c_bboxes.shape[0] != 0: # print(c_bboxes.shape) print('{}: {}'.format(j, c_bboxes)) for box in c_bboxes: cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1, 0) cv2.putText(img, '{}'.format(j), (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA) cv2.imshow('rr', img) cv2.waitKey(0)
class Detector(object): def __init__(self, model_path): # self.net_name = net_name self.model_path = model_path self.num_classes = 81 self.cuda = torch.cuda.is_available() self.label_map_list = list(coco_label_map.values()) self._init_model() def _init_model(self): if torch.cuda.is_available(): cuda = True if '300' in self.model_path: cfg = COCO_300 self.img_dim = 300 print('Model input size is 300') else: cfg = COCO_512 self.img_dim = 512 print('Model input size is 512') priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if cuda: self.priors = priors.cuda() self.net = build_rfb_vgg_net('test', self.img_dim, self.num_classes) # initialize detector state_dict = torch.load(self.model_path)['state_dict'] # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() if cuda: self.net = self.net.cuda() cudnn.benchmark = True else: self.net = self.net.cpu() print('Finished loading model!') # print(net) self.detector = Detect(self.num_classes, 0, cfg) def predict_on_img(self, img): scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) transform = BaseTransform(self.net.size, (123, 117, 104), (2, 0, 1)) with torch.no_grad(): x = transform(img).unsqueeze(0) x = Variable(x) if self.cuda: x = x.cuda() scale = scale.cuda() tic = time.time() out = self.net(x) # forward pass boxes, scores = self.detector.forward(out, self.priors) print('Finished in {}'.format(time.time() - tic)) boxes = boxes[0] scores = scores[0] boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() return boxes, scores def predict_on_video(self, v_f): cap = cv2.VideoCapture(v_f) while cap.isOpened(): ok, frame = cap.read() if ok: img = frame boxes, scores = self.predict_on_img(frame) # print(boxes.shape) # print(scores.shape) # scale each detection back up to the image tic = time.time() for j in range(1, self.num_classes): # print(max(scores[:, j])) inds = np.where(scores[:, j] > 0.6)[0] # conf > 0.6 if inds is None: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) keep = nms(c_dets, 0.6) c_dets = c_dets[keep, :] c_bboxes = c_dets[:, :4] # print(c_bboxes.shape) # print(c_bboxes.shape[0]) if c_bboxes.shape[0] != 0: # print(c_bboxes.shape) # print('{}: {}'.format(j, c_bboxes)) for box in c_bboxes: label = self.label_map_list[j-1] cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1, 0) cv2.putText(img, label, (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA) # print('post process time: {}'.format(time.time() - tic)) cv2.imshow('rr', frame) cv2.waitKey(1) else: print('Done') exit(0)
from layers.functions import Detect, PriorBox from data.config import VOC_320 import torch top_k = 200 confidence_threshold = 0.5 nms_threshold = 0.45 priorbox = PriorBox(VOC_320) detector = Detect(21, 0, top_k, confidence_threshold, nms_threshold) with torch.no_grad(): priors = priorbox.forward() loc = torch.randn(1, 6375, 4) conf = torch.randn(6375, 21) arm_loc = torch.randn(1, 6375, 4) out = detector.forward(loc, conf, priors, arm_loc_data=None)
def detect(self, file_name, object): print(file_name) start_time = time.time() img = cv2.imread(file_name.strip()) if img is None: QtWidgets.QMessageBox.information(self, "Alert", "Please select images") return scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) detector = Detect(object.numclass, 0, object.cfg) transform = BaseTransform(object.net.size, (123, 117, 104), (2, 0, 1)) with torch.no_grad(): x = transform(img).unsqueeze(0) if object.cuda: x = x.cuda() scale = scale.cuda() out = object.net(x) with torch.no_grad(): priors = object.priorbox.forward() if object.cuda: priors = priors.cuda() boxes, scores = detector.forward(out, priors) boxes = boxes[0] scores = scores[0] boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() result_set = [] for j in range(1, object.numclass): max_ = max(scores[:, j]) inds = np.where(scores[:, j] > 0.2)[0] # conf > 0.6 if inds is None: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = object.nms_py(c_dets, 0.6) c_dets = c_dets[keep, :] c_bboxes = c_dets[:, :4] for bbox in c_bboxes: # Create a Rectangle patch rect = patches.Rectangle((int(bbox[0]), int(bbox[1])), int(bbox[2]) - int(bbox[0]) + 1, int(bbox[3]) - int(bbox[1]) + 1, linewidth=1, edgecolor='r') result_set.append(str(rect)) cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imwrite("my_test.png", img) end_time = time.time() print(end_time - start_time) img_data = QtGui.QPixmap("my_test.png") height = object.height() width = object.height() / img_data.height() * img_data.width() img_data = img_data.scaled(width, height) object.label.resize(width, height) object.label.setPixmap(img_data) self.setFocus()
def test_net(net, testset): net.eval() test_image_nums = len(testset) detector = Detect() # TODO num_classes = 2 # TODO all_boxes = [[[] for _ in range(test_image_nums)] for _ in range(num_classes)] all_landmarks = [[[] for _ in range(test_image_nums)] for _ in range(num_classes)] # all_landmarks = [[[] for _ in range(test_image_nums)] for _ in range(num_classes)] print(all_boxes) for idx in tqdm(range(test_image_nums)): with torch.no_grad(): # image = testset.pull_image(idx) # TODO image = cv2.imread(testset[idx]) # TODO target_size = 1600 max_size = 2150 # target_size = 2000 # max_size = 3000 # target_size = 640 # max_size = 900 # target_size = 640 # max_size = 640 im_shape = image.shape # H, W, C im_size_min = min(im_shape[0:2]) im_size_max = max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] for im_scale in scales: if im_scale != 1.0: image_new = cv2.resize(image, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: image_new = image.copy() PIXEL_MEANS = np.array([0.406, 0.456, 0.485]) # bgr mean PIXEL_STDS = np.array([0.225, 0.224, 0.229]) PIXEL_SCALE = 255.0 im_tensor = np.zeros( (3, image_new.shape[0], image_new.shape[1])) for i in range(3): im_tensor[ i, :, :] = (image_new[:, :, 2 - i] / PIXEL_SCALE - PIXEL_MEANS[2 - i]) / PIXEL_STDS[2 - i] im_tensor = im_tensor.astype(np.float32) print("im_tensor: ", im_tensor.shape) im_tensor = torch.from_numpy(im_tensor) im_tensor = im_tensor.unsqueeze(0) if args.gpu: im_tensor = im_tensor.cuda() net_out = net(im_tensor) if cfg.FACE_LANDMARK: print("im_tensor222: ", im_tensor.shape[2:]) scores, boxes, landmarks = detector.forward( net_out, im_tensor.shape[2:]) # scores, boxes, landmarks = detector.forward(net_out) else: scores, boxes = detector.forward(net_out, im_tensor.shape[2:]) scores = scores.cpu().numpy() boxes = boxes.cpu().numpy() / im_scale # boxes = boxes.cpu().numpy() #.cpu().numpy() if cfg.FACE_LANDMARK: landmarks = landmarks.cpu().numpy( ) / im_scale #.cpu().numpy() # landmarks = landmarks.cpu().numpy() print(scores.shape) print(boxes.shape) # TODO split as a function for cls in range(1, num_classes): inds = np.where(scores[:, cls] > args.score_thresh)[0] if len(inds) == 0: print("XXXXXXX") all_boxes[cls][idx] = np.empty([0, 5], dtype=np.float32) if cfg.FACE_LANDMARK: all_landmarks[cls][idx] = np.empty( [0, 10], dtype=np.float32) continue c_boxes = boxes[inds] c_scores = scores[inds, cls] c_dets = np.hstack( (c_boxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) if cfg.FACE_LANDMARK: c_landmarks = landmarks[inds] # print(c_dets) keep = nms(c_dets, args.nms_overlap, force_cpu=True) # TODO soft_nms box_num = 150 #50 keep = keep[:box_num] # keep only the highest boxes c_dets = c_dets[keep, :] all_boxes[cls][idx] = c_dets if cfg.FACE_LANDMARK: # c_landmarks = c_landmarks[keep, :] all_landmarks[cls][idx] = c_landmarks bbx = all_boxes[1][0] lmks = all_landmarks[1][0] print(lmks) # print(bbx) DEBUG_I = True if DEBUG_I: # img = cv2.imread(roi['image_path']) # bbxes = roi['boxes'] # lmks = roi['landmarks']#.reshape(-1, 15) # print(roi['image_path']) # if roi['flipped']: # img = img[:, ::-1] # print("images/z_flipped_" + osp.basename(roi['image_path'])) # cv2.imwrite("images/z_flipped_" + osp.basename(roi['image_path']), img) # img = cv2.imread("images/z_flipped_" + osp.basename(roi['image_path'])) for jj in range(bbx.shape[0]): sf, st = (int(bbx[jj][0]), int(bbx[jj][1])), (int(bbx[jj][2]), int(bbx[jj][3])) print(sf, st) # print(lmks[jj]) # print() cv2.rectangle(image, sf, st, (0, 0, 255), thickness=2) # print((lmks[jj][0, 0],lmks[jj][0, 1])) # print((lmks[jj][1, 0],lmks[jj][1, 1])) # print((lmks[jj][2, 0],lmks[jj][2, 1])) # print((lmks[jj][3, 0],lmks[jj][3, 1])) # print((lmks[jj][4, 0],lmks[jj][4, 1])) # cv2.circle(image,(lmks[jj][0, 0],lmks[jj][0, 1]),radius=1,color=(0,0,255),thickness=2) # cv2.circle(image,(lmks[jj][1, 0],lmks[jj][1, 1]),radius=1,color=(0,255,0),thickness=2) # cv2.circle(image,(lmks[jj][2, 0],lmks[jj][2, 1]),radius=1,color=(255,0,0),thickness=2) # cv2.circle(image,(lmks[jj][3, 0],lmks[jj][3, 1]),radius=1,color=(0,255,255),thickness=2) # cv2.circle(image,(lmks[jj][4, 0],lmks[jj][4, 1]),radius=1,color=(255,255,0),thickness=2) # cv2.circle(image,(lmks[jj][0],lmks[jj][1]),radius=1,color=(0,0,255),thickness=2) # cv2.circle(image,(lmks[jj][2],lmks[jj][3]),radius=1,color=(0,255,0),thickness=2) # cv2.circle(image,(lmks[jj][4],lmks[jj][5]),radius=1,color=(255,0,0),thickness=2) # cv2.circle(image,(lmks[jj][6],lmks[jj][7]),radius=1,color=(0,255,255),thickness=2) # cv2.circle(image,(lmks[jj][8],lmks[jj][9]),radius=1,color=(255,255,0),thickness=2) cv2.imwrite("images/img.jpg", image)
def detect_parking_spaces(dir, threshold=0.2, save=False, show=False, cam=-1, gpu=False, config='training/m2det/configs/m2det512_vgg.py', weights='training/m2det/weights/m2det512_vgg.pth'): print('Detect Parking Spaces Programe') cfg = Config.fromfile(config) anchor_config = anchors(cfg) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, weights) net.eval() if not gpu: cfg.test_cfg.cuda = False with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() print_info('===> Finished constructing and loading model') _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) base = int(np.ceil(pow(cfg.model.m2det_config.num_classes, 1. / 3))) colors = [ _to_color(x, base) for x in range(cfg.model.m2det_config.num_classes) ] cats = [ _.strip().split(',')[-1] for _ in open('training/m2det/data/coco_labels.txt', 'r').readlines() ] labels = tuple(['__background__'] + cats) im_path = dir + '/images' cam = cam if cam >= 0: capture = cv2.VideoCapture(cam) im_fnames = sorted((fname for fname in os.listdir(im_path) if os.path.splitext(fname)[-1] == '.jpg')) im_fnames = (os.path.join(im_path, fname) for fname in im_fnames) im_iter = iter(im_fnames) save_dir = dir + '/detection_images' os.makedirs(save_dir, exist_ok=True) locs_list = {} while True: if cam < 0: try: fname = next(im_iter) except StopIteration: break image = cv2.imread(fname, cv2.IMREAD_COLOR) else: ret, image = capture.read() if not ret: cv2.destroyAllWindows() capture.release() break loop_start = time.time() w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = net(img) if not gpu: priors = priors.cpu() boxes, scores = detector.forward(out, priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms( c_dets, cfg.test_cfg.iou, force_cpu=soft_nms ) #min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] # print('\n'.join(['pos:{}, ids:{}, score:{:.3f}'.format('(%.1f,%.1f,%.1f,%.1f)' % (o[0],o[1],o[2],o[3]) \ # ,labels[int(oo)],ooo) for o,oo,ooo in zip(boxes,cls_inds,scores)])) fps = 1.0 / float(loop_time) if cam >= 0 else -1 im2show, loc = draw_detection(image, boxes, scores, cls_inds, fps, threshold, colors=colors, labels=labels) locs_list[fname] = loc if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int( 1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) if show: cv2.imshow('test', im2show) if cam < 0: cv2.waitKey(1000) else: if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() capture.release() break if save: name = fname.split('.')[0] name = name.split('/')[-1] cv2.imwrite(f"{save_dir}/{name}.jpg", im2show) save_name = dir + '/labels/split.txt' f = open(save_name, 'wb') pickle.dump(locs_list, f) f.close()
def main(): mean = (104, 117, 123) if 'FPN' in backbone: from model.refinedet_vgg import build_net static_net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) else: from model.ssd4scale_vgg import build_net static_net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn, deform=deform) print('loading model!') static_net.load_state_dict(torch.load(static_dir)) static_net.eval() static_net = static_net.to(device) net.load_state_dict(torch.load(trn_dir)) net.eval() net = net.to(device) print('Finished loading model!', static_dir, trn_dir) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) frame_num = 0 cap = cv2.VideoCapture(video_name) w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) size = (640, 480) if save_dir: fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') record = cv2.VideoWriter( os.path.join(save_dir, video_name.split('/')[-1].split('.')[0] + '.avi'), fourcc, cap.get(cv2.CAP_PROP_FPS), size) # static_flag = True offset_list = list() ref_loc = list() while (cap.isOpened()): ret, frame = cap.read() if not ret: break h, w, _ = frame.shape frame_draw = frame.copy() im_trans = base_transform(frame, ssd_dim, mean) with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if frame_num % interval == 0: # if static_flag: static_out = static_net(x, ret_loc=deform) priors_static = center_size( decode(static_out[0][0], priors, [0.1, 0.2])) if deform: ref_loc = static_out[ 2] # [o * args.loose for o in static_out[2]] offset_list = list() out = net(x, ref_loc=ref_loc, offset_list=offset_list, ret_off=(False, True)[deform and not offset_list]) detections = detector.forward(out[0], out[1], priors_static, scale=torch.cuda.FloatTensor( [w, h, w, h])) if len(detections) == 3: offset_list = out[2] ref_loc = list() # if static_flag: # ref_mask = mask.clone()mask # print('static') # static_flag = False # else: # time1 = time.time() # s_score = (mask * ref_mask).sum().float() / (mask + ref_mask).sum().float() # static_flag = (False, True)[s_score<0.45] # time2 = time.time() # print(s_score, 'match time:', time2-time1) out = list() for j in range(1, detections.size(1)): if detections[0, j, :, :].sum() == 0: continue for k in range(detections.size(2)): dets = detections[0, j, k, :] if dets.sum() == 0: continue boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:] identity = dets[-1] if dets.size(0) == 6 else -1 x_min = int(boxes[0] * w) x_max = int(boxes[2] * w) y_min = int(boxes[1] * h) y_max = int(boxes[3] * h) score = dets[0] if score > confidence_threshold: put_str = VID_CLASSES_name[j - 1] + ':' + str( np.around(score, decimals=2)).split('(')[-1].split(',')[0][:4] color = (255, 0, 0) cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max), color, thickness=2) cv2.putText(frame_draw, put_str, (x_min + 10, y_min - 10), cv2.FONT_HERSHEY_DUPLEX, 0.8, color=color, thickness=1) print(str(frame_num)) frame_num += 1 frame_show = cv2.resize(frame_draw, size) cv2.imshow('frame', frame_show) # 255* mask.cpu().numpy()) if save_dir: record.write(frame_show) ch = cv2.waitKey(1) if ch == 32: # if frame_num % 1 ==0: while 1: in_ch = cv2.waitKey(10) if in_ch == 115: # 's' if save_dir: print('save: ', frame_num) torch.save( out, os.path.join(save_dir, '_%s.pkl' % str(frame_num))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(frame_num)), frame) elif in_ch == 32: break cap.release() if save_dir: record.release() cv2.destroyAllWindows()