def __init__(self, cfg=None, weight=None, img_size=(416, 416), device=None): if cfg == None: cfg = os.path.dirname(os.path.abspath(__file__)) cfg = os.path.join(cfg, 'cfg/yolov-obj.cfg') if weight == None: weight = os.path.dirname(os.path.abspath(__file__)) weight = os.path.join(weight, 'cfg/yolov-obj_final.weights') assert os.path.exists(cfg), 'yolo.configure file must exist' assert os.path.exists(weight), 'yolo.weight file must exist' self.img_size = img_size model = Darknet(cfg, img_size) load_darknet_weights(model, weight) model.fuse() self.model = model.to(device) self.model.eval() print('load detector weight of %s' % weight) self.device = device
def __init__(self, config, device): self.opt = opt = config self.conf_thres = opt['conf_thres'] self.nms_thres = opt['nms_thres'] self.img_size = opt['img_size'] self.out_img_size = out_size = opt['out_size'] # Set up model self.model = Darknet(opt['model_def'], img_size=opt['img_size'])\ .to(device) if opt['weights_path'].endswith(".weights"): # Load darknet weights self.model.load_darknet_weights(opt['weights_path']) else: # Load checkpoint weights self.model.load_state_dict(torch.load(opt['weights_path'])) self.model.eval() # Set in evaluation mode # Extracts class labels from file self.classes = yolo_utils.load_classes(opt['class_path']) mode = "nearest" self.b1_scale = nn.Upsample(scale_factor=out_size // 8, mode=mode) self.b2_scale = nn.Upsample(scale_factor=out_size // 16, mode=mode) self.b3_scale = nn.Upsample(scale_factor=out_size // 32, mode=mode) self.no_detects = 0
def define_yolo(model_def): """ return --- a Darknet class object: yolo the forward function of yolo returns: -(featuremap, yolo_outputs) # for inference -(loss, featuremap, yolo_outputs) # for training """ yolo = Darknet(model_def) return yolo
def __init__( self, device, img_size=416, person_detector=False, video=False, return_dict=False ): homedir = '/' weights_path = os.path.join(homedir, 'torch/models/yolov3.weights') os.makedirs(os.path.dirname(weights_path), exist_ok=True) if not os.path.isfile(weights_path): url = 'https://pjreddie.com/media/files/yolov3.weights' outdir = os.path.dirname(weights_path) download_url(url, outdir) model_def = os.path.join(homedir, 'torch/config/yolov3.cfg') os.makedirs(os.path.dirname(model_def), exist_ok=True) if not os.path.isfile(model_def): url = 'https://raw.githubusercontent.com/mkocabas/yolov3-pytorch/master/yolov3/config/yolov3.cfg' outdir = os.path.dirname(model_def) download_url(url, outdir) self.conf_thres = 0.8 self.nms_thres = 0.4 self.img_size = img_size self.video = video self.person_detector = person_detector self.device = device self.return_dict = return_dict self.model = Darknet(model_def, img_size=img_size).to(device) self.model.load_darknet_weights(weights_path) # self.model.load_state_dict(torch.load(weights_path)) self.model.eval()
def __init__(self): self.img_size = 512 self.augment = False self.half = False self.agnostic_nms = False self.iou_thres = 0.6 self.fourcc = 'mp4v' self.conf_thres = 0.3 self.out = 'output' self.save_txt = True self.view_img = True self.save_img = True weights = 'yolov3/weights/yolov3.pt' self.device = 'cuda' self.model = Darknet('yolov3/cfg/yolov3.cfg', self.img_size) self.model.load_state_dict(torch.load(weights, map_location=self.device)['model']) # Second-stage classifier self.classify = False if self.classify: self.modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize self.modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=self.device)['model']) # load weights self.modelc.to(self.device).eval() # Eval mode self.model.to(self.device).eval() # Fuse Conv2d + BatchNorm2d layers # model.fuse() # Half precision self.half = self.half and self.device.type != 'cpu' # half precision only supported on CUDA if self.half: self.model.half() # Get names and colors self.names = load_classes('yolov3/data/coco.names')
def __init__(self, device, img_size=416, person_detector=False, video=False, return_dict=False): homedir = os.path.expanduser("~") weights_path = os.path.join(homedir, '.torch/models/yolov3.weights') model_def = os.path.join(homedir, '.torch/config/yolov3.cfg') self.conf_thres = 0.8 self.nms_thres = 0.4 self.img_size = img_size self.video = video self.person_detector = person_detector self.device = device self.return_dict = return_dict self.model = Darknet(model_def, img_size=img_size).to(device) self.model.load_darknet_weights(weights_path) # self.model.load_state_dict(torch.load(weights_path)) self.model.eval()
def init_model(cfg_path, device): opt = yaml.load(open(cfg_path)) nms_thres = opt['nms_thres'] # Set up model model = Darknet(opt['model_def'], img_size=opt['img_size']).to(device) if opt['weights_path'].endswith(".weights"): # Load darknet weights model.load_darknet_weights(opt['weights_path']) else: # Load checkpoint weights model.load_state_dict(torch.load(opt['weights_path'])) model.eval() # Set in evaluation mode classes = utils.load_classes( opt['class_path']) # Extracts class labels from file return model, classes
class YOLOv3: def __init__( self, device, img_size=416, person_detector=False, video=False, return_dict=False ): homedir = '/' weights_path = os.path.join(homedir, 'torch/models/yolov3.weights') os.makedirs(os.path.dirname(weights_path), exist_ok=True) if not os.path.isfile(weights_path): url = 'https://pjreddie.com/media/files/yolov3.weights' outdir = os.path.dirname(weights_path) download_url(url, outdir) model_def = os.path.join(homedir, 'torch/config/yolov3.cfg') os.makedirs(os.path.dirname(model_def), exist_ok=True) if not os.path.isfile(model_def): url = 'https://raw.githubusercontent.com/mkocabas/yolov3-pytorch/master/yolov3/config/yolov3.cfg' outdir = os.path.dirname(model_def) download_url(url, outdir) self.conf_thres = 0.8 self.nms_thres = 0.4 self.img_size = img_size self.video = video self.person_detector = person_detector self.device = device self.return_dict = return_dict self.model = Darknet(model_def, img_size=img_size).to(device) self.model.load_darknet_weights(weights_path) # self.model.load_state_dict(torch.load(weights_path)) self.model.eval() @torch.no_grad() def __call__(self, batch): if self.video: inp_batch = [] for img in batch: # Pad to square resolution img, _ = pad_to_square(img, 0) # Resize img = resize(img, self.img_size) inp_batch.append(img) inp_batch = torch.stack(inp_batch).float().to(self.device) else: inp_batch = batch detections = self.model(inp_batch) detections = non_max_suppression(detections, self.conf_thres, self.nms_thres) for idx, det in enumerate(detections): if det is None: det = { 'boxes': torch.empty(0,4), 'scores': torch.empty(0), 'classes': torch.empty(0), } detections[idx] = det continue if self.video: det = rescale_boxes(det, self.img_size, batch.shape[-2:]) if self.person_detector: det = det[det[:,6] == 0] if self.return_dict: det = { 'boxes': det[:, :4], 'scores': det[:, 4] * det[:, 5], 'classes': det[:, 6], } detections[idx] = det return detections
class YOLOv3: def __init__(self, device, img_size=416, person_detector=False, video=False, return_dict=False): homedir = os.path.expanduser("~") weights_path = os.path.join(homedir, '.torch/models/yolov3.weights') model_def = os.path.join(homedir, '.torch/config/yolov3.cfg') self.conf_thres = 0.8 self.nms_thres = 0.4 self.img_size = img_size self.video = video self.person_detector = person_detector self.device = device self.return_dict = return_dict self.model = Darknet(model_def, img_size=img_size).to(device) self.model.load_darknet_weights(weights_path) # self.model.load_state_dict(torch.load(weights_path)) self.model.eval() @torch.no_grad() def __call__(self, batch): if self.video: inp_batch = [] for img in batch: # Pad to square resolution img, _ = pad_to_square(img, 0) # Resize img = resize(img, self.img_size) inp_batch.append(img) inp_batch = torch.stack(inp_batch).float().to(self.device) else: inp_batch = batch detections = self.model(inp_batch) detections = non_max_suppression(detections, self.conf_thres, self.nms_thres) for idx, det in enumerate(detections): if det is None: det = { 'boxes': torch.empty(0, 4), 'scores': torch.empty(0), 'classes': torch.empty(0), } detections[idx] = det continue if self.video: det = rescale_boxes(det, self.img_size, batch.shape[-2:]) if self.person_detector: det = det[det[:, 6] == 0] if self.return_dict: det = { 'boxes': det[:, :4], 'scores': det[:, 4] * det[:, 5], 'classes': det[:, 6], } detections[idx] = det return detections
def myDetect(save_img=False, imgSize=416, outputPath="../output", inputSource='0', opt_names='', opt_cfg='cfg/yolov3-spp.cfg', currentWeights='weights/yolov3-spp.weights', opt_fourcc='mp4v', opt_half=False, opt_view_img=False, opt_save_txt=False, opt_device='', opt_agnostic_nms=False, opt_iou_thres=0.5, opt_conf_thres=0.3, opt_classes=0): img_size = ( 416, 256 ) if ONNX_EXPORT else imgSize # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img, save_txt = outputPath, inputSource, currentWeights, opt_half, opt_view_img, opt_save_txt webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # Initialize device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else opt_device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize model model = Darknet(opt_cfg, img_size) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format load_darknet_weights(model, weights) # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Fuse Conv2d + BatchNorm2d layers # model.fuse() # Eval mode model.to(device).eval() # Export mode if ONNX_EXPORT: img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) torch.onnx.export(model, img, 'weights/export.onnx', verbose=False, opset_version=10) # Validate exported model import onnx model = onnx.load('weights/export.onnx') # Load the ONNX model onnx.checker.check_model(model) # Check that the IR is well formed print(onnx.helper.printable_graph( model.graph)) # Print a human readable representation of the graph return # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=img_size, half=half) else: save_img = True dataset = LoadImages(source, img_size=img_size, half=half) # Get names and colors names = load_classes(opt_names) # print("names",names) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] ### 自定义内容 datium = {"hat": 0, "person": 0} ### 自定义内容 # Run inference t0 = time.time() for path, img, im0s, vid_cap in dataset: t = time.time() # Get detections img = torch.from_numpy(img).to(device) if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img)[0] if opt_half: pred = pred.float() # Apply NMS pred = non_max_suppression(pred, opt_conf_thres, opt_iou_thres, classes=opt_classes, agnostic=opt_agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i] else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) s += '%gx%g ' % img.shape[2:] # print string if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() datium = {"hat": 0, "person": 0} # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string datium[names[int(c)]] = int(n) # with open('resultData.txt', 'w') as f: # 设置文件对象 # f.write(str(datium)) # 将字符串写入文件中 # localtime = time.time() print(datium['hat'], datium['person']) try: # 执行sql语句 cursor.execute( "INSERT INTO maskData(mask,nomask) VALUES({hat},{person})" .format(hat=datium['hat'], person=datium['person'])) # 提交到数据库执行 db.commit() except: print("发生错误") # 如果发生错误则回滚 db.rollback() # db.close() # Write results for *xyxy, conf, cls in det: if save_txt: # Write to file with open(save_path + '.txt', 'a') as file: file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, time.time() - t)) # Stream results if view_img: # cv2.imshow("webcam", im0) cv2.imwrite( "C:/Users/y2554/Desktop/mask/server/output/camera.jpg", im0) # if cv2.waitKey(1) == ord('q'): # q to quit # raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) print("save_path:{}".format(save_path)) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc('H', '2', '6', '4'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) print('Results saved to %s' % os.sep + save_path) if platform == 'darwin': # MacOS os.system('open ' + out + ' ' + save_path) print('Done. (%.3fs)' % (time.time() - t0)) # return save_path combineData = [save_path, datium] return combineData
def detect(save_img=False): img_size = ( 416, 256 ) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # Initialize device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize model model = Darknet(opt.cfg, img_size) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format load_darknet_weights(model, weights) # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Fuse Conv2d + BatchNorm2d layers # model.fuse() # Eval mode model.to(device).eval() # Export mode if ONNX_EXPORT: img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) torch.onnx.export(model, img, 'weights/export.onnx', verbose=False, opset_version=10) # Validate exported model import onnx model = onnx.load('weights/export.onnx') # Load the ONNX model onnx.checker.check_model(model) # Check that the IR is well formed print(onnx.helper.printable_graph( model.graph)) # Print a human readable representation of the graph return # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=img_size, half=half) else: save_img = True dataset = LoadImages(source, img_size=img_size, half=half) # Get names and colors names = load_classes(opt.names) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() for path, img, im0s, vid_cap in dataset: t = time.time() # Get detections img = torch.from_numpy(img).to(device) if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img)[0] if opt.half: pred = pred.float() # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i] else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) print('save_path ' + save_path[:-4]) s += '%gx%g ' % img.shape[2:] # print string if det is None: emptyList.append(save_path) print(save_path) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: if save_txt: # Write to file with open(save_path[:-4] + '.txt', 'a') as file: file.write( ('%s %.3f %.2f %.2f %.2f %.2f ' + '\n') % (names[int(cls)], conf, *xyxy)) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, time.time() - t)) print(emptyList) # Stream results if view_img: cv2.imshow("webcam", im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) # print(save_path) # pirate = cv2.imread(save_path) # cv2.imshow('pirate', pirate) # cv2.waitKey(0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + out + ' ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
class ObjectDetector: def __init__(self): self.img_size = 512 self.augment = False self.half = False self.agnostic_nms = False self.iou_thres = 0.6 self.fourcc = 'mp4v' self.conf_thres = 0.3 self.out = 'output' self.save_txt = True self.view_img = True self.save_img = True weights = 'yolov3/weights/yolov3.pt' self.device = 'cuda' self.model = Darknet('yolov3/cfg/yolov3.cfg', self.img_size) self.model.load_state_dict(torch.load(weights, map_location=self.device)['model']) # Second-stage classifier self.classify = False if self.classify: self.modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize self.modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=self.device)['model']) # load weights self.modelc.to(self.device).eval() # Eval mode self.model.to(self.device).eval() # Fuse Conv2d + BatchNorm2d layers # model.fuse() # Half precision self.half = self.half and self.device.type != 'cpu' # half precision only supported on CUDA if self.half: self.model.half() # Get names and colors self.names = load_classes('yolov3/data/coco.names') def detect(self, img): # Run inference im0 = img.copy() # Padded resize img = letterbox(im0, new_shape=self.img_size)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() with torch.no_grad(): pred = self.model(img, augment=self.augment)[0] t2 = torch_utils.time_synchronized() # print('Predict time: (%.3fs)' % (t2 - t1)) # to float if self.half: pred = pred.float() # Apply NMS pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, multi_label=False, classes=None, agnostic=self.agnostic_nms) # Apply Classifier if self.classify: pred = apply_classifier(pred, self.modelc, img, im0) # Process detections det = pred[0] sce = Scene(im0) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Write results for *xyxy, conf, cls in det: # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh obj = Object(self.names[int(cls)], xyxy, conf) sce.objs.append(obj) return sce
class YoloDetector: def __init__(self, config, device): self.opt = opt = config self.conf_thres = opt['conf_thres'] self.nms_thres = opt['nms_thres'] self.img_size = opt['img_size'] self.out_img_size = out_size = opt['out_size'] # Set up model self.model = Darknet(opt['model_def'], img_size=opt['img_size'])\ .to(device) if opt['weights_path'].endswith(".weights"): # Load darknet weights self.model.load_darknet_weights(opt['weights_path']) else: # Load checkpoint weights self.model.load_state_dict(torch.load(opt['weights_path'])) self.model.eval() # Set in evaluation mode # Extracts class labels from file self.classes = yolo_utils.load_classes(opt['class_path']) mode = "nearest" self.b1_scale = nn.Upsample(scale_factor=out_size // 8, mode=mode) self.b2_scale = nn.Upsample(scale_factor=out_size // 16, mode=mode) self.b3_scale = nn.Upsample(scale_factor=out_size // 32, mode=mode) self.no_detects = 0 def rescale_boxes(self, boxes, current_dim, original_shape): """ Rescales bounding boxes to the original shape """ orig_h, orig_w = original_shape # The amount of padding that was added pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) # Image height and width after padding is removed unpad_h = current_dim - pad_y unpad_w = current_dim - pad_x # Rescale bounding boxes to dimension of original image boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h return boxes @staticmethod def class_selector(): with open("yolov3/data/coco.names", "r") as f: yolo_classes = f.readlines() yolo_classes = [x.strip() for x in yolo_classes] indexer = torch.zeros(len(CLASSES), len(yolo_classes)).bool() for i, (k, v) in enumerate(CLASSES.items()): indexer[i, yolo_classes.index(v)] = 1 return indexer def detect(self, rgb_img): self.no_detects += 1 max_batch = 128 """ Should run with RGB images normalized in [0, 1] """ with torch.no_grad(): multi_batch = [] all_imgs = rgb_img for i in range(len(all_imgs) // max_batch + 1): rgb_img = all_imgs[i*max_batch: (i+1)*max_batch] if len(rgb_img) <= 0: break bs = rgb_img.size(0) detections = self.model(rgb_img) b1, b2, b3 = (detections[:, :192], detections[:, 192: 960], detections[:, 960:]) ordd = (0, 1, 4, 2, 3) b1 = b1.view(bs, 3, 8, 8, 85).permute(*ordd).contiguous().view(bs, -1, 8, 8) b2 = b2.view(bs, 3, 16, 16, 85).permute(*ordd).contiguous().view(bs, -1, 16, 16) b3 = b3.view(bs, 3, 32, 32, 85).permute(*ordd).contiguous().view(bs, -1, 32, 32) b1 = self.b1_scale(b1) b2 = self.b2_scale(b2) b3 = self.b3_scale(b3) out = (b1 + b2 + b3) / 3 out = out.view(bs, 3, 85, 32, 32) out = out.mean(dim=1)#.permute(0, 3, 1, 2) # out = torch.cat([b1, b2, b3], dim=1) multi_batch.append(out) if len(multi_batch) > 1: out = torch.cat(multi_batch, dim=0) else: out = multi_batch[0] out = out.detach() return out def get_bounding_boxes(self, img, display=False): img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) t_img = torch.from_numpy( img.astype('float') / 255.0).cuda().permute(2, 0, 1).unsqueeze(0) detections = self.model(t_img) detections = yolo_utils.non_max_suppression(detections, self.conf_thres, self.nms_thres)[0] # Draw bounding boxes and labels of detections if display: img_disp = img.copy() if detections is not None: # Rescale boxes to original image detections = self.rescale_boxes(detections, self.img_size, img.shape[:2]) unique_labels = detections[:, -1].cpu().unique() for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: print("\t+ Label: %s, Conf: %.5f" % (self.classes[int(cls_pred)], cls_conf.item())) box_w = x2 - x1 box_h = y2 - y1 print("{} {} {} {}" .format(x1, y1, x2, y2)) # Create a Rectangle patch if display: img_disp = cv2.rectangle(img_disp, (x2, y2), (x1, y1), (255,0,0), 2) if display: cv2.imshow("Test", img_disp) cv2.waitKey(0) return detections
def main(args: argparse.Namespace): # setting log and logger logname = '' if args.islog: logdir = Path('../logs') if not logdir.exists(): logdir.mkdir(parents=True) now_dt = datetime.now() logname = '{:d}-{:d}-inference.log'.format( now_dt.strftime('%m%dT%H%M%S'), now_dt.microsecond) logname = str(logdir / logname) logger = logging.getLogger(__name__) log_handler(logger, logname=logname) logger.info(args) # prepare video IO cap = cv2.VideoCapture(args.video) video_nframe = cap.get(cv2.CAP_PROP_FRAME_COUNT) video_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) video_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) video_fps = cap.get(cv2.CAP_PROP_FPS) logger.info('video h={}, w={}, fps={:3f}, nframe={}'.format( int(video_h), int(video_w), video_fps, int(video_nframe))) output_dir = Path(args.output_dir) if not output_dir.exists(): output_dir.mkdir(parents=True) output_videoname = str(output_dir / '{}.avi'.format(Path(args.video).stem)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_videoname, fourcc=fourcc, fps=int(video_fps), frameSize=(int(video_w), int(video_h))) # load model and weight logger.info('load model') model = Darknet(args.config, img_size=args.img_size) logger.info('load weight') model.load_weights(args.checkpoint) model.cuda() model.eval() classes = load_classes(args.classname) tracker = SORT() # draw setting cmap = plt.get_cmap('tab20b') bbox_palette = [cmap(i)[:3] for i in np.linspace(0, 1, 1000)] random.shuffle(bbox_palette) # loop over the video for frame_idx in tqdm(range(int(video_nframe))): ok, frame = cap.read() if not ok: break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pilimg = Image.fromarray(frame) # detection _start_time = datetime.now() detections = detect_image(pilimg, model, img_size=args.img_size) _cost_time = datetime.now() - _start_time # image and bbox transition frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) image = np.array(pilimg) pad_x = max(image.shape[0] - image.shape[1], 0) * (args.img_size / max(image.shape)) pad_y = max(image.shape[1] - image.shape[0], 0) * (args.img_size / max(image.shape)) unpad_h = args.img_size - pad_y unpad_w = args.img_size - pad_x if detections is not None: logger.debug('detect frame {} in {}, get detections {}'.format( frame_idx+1, str(_cost_time), detections.shape)) tracked_detections = tracker.update(detections.cpu()) unique_labels = detections[:, -1].cpu().unique() num_unique_labels = len(unique_labels) for x1, y1, x2, y2, obj_id, cls_pred in tracked_detections: box_h = int(((y2 - y1) / unpad_h) * frame.shape[0]) box_w = int(((x2 - x1) / unpad_w) * frame.shape[1]) y1 = int(((y1 - pad_y // 2) / unpad_h) * frame.shape[0]) x1 = int(((x1 - pad_x // 2) / unpad_w) * frame.shape[1]) label = classes[int(cls_pred)] color = bbox_palette[int(obj_id) % len(bbox_palette)] color = [i*255 for i in color] cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 2) cv2.rectangle(frame, (x1, y1-35), (x1+len(label)*19+60, y1), color, -1) cv2.putText(frame, '{}-{}'.format(label, int(obj_id)), (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3) out.write(frame) cap.release() out.release()