def detect(self, img_path): img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) _, H, W = img.shape h_factor, w_factor = (H, W) img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape imgs = torch.stack([resize(img_, 416) for img_ in [img]]).to('cuda') with torch.no_grad(): output = self.net(imgs) output = non_max_suppression(output, 0.5, 0.5)[0] output = rescale_boxes(output, 416, (H, W)).numpy() detections = [] boxes = [] confidences = [] class_ids = [] for x1, y1, x2, y2, conf, cls_conf, cls_pred in output: width = x2 - x1 height = y2 - y1 x = x1 y = y1 boxes.append([x, y, int(width), int(height)]) confidences.append(float(cls_conf)) class_ids.append(int(cls_pred)) class_ = self.get_class(int(cls_pred)) top_left = (int(x1), int(y1)) bottom_right = (int(x2), int(y2)) box_2d = [top_left, bottom_right] detections.append(Detection(box_2d, class_)) return detections
def Yolo_detect(model, camInputFrame, img_size=416, conf_thres=0.8, nms_thres=0.4): img = transforms.ToTensor()(Image.fromarray(camInputFrame)) # Pad to square resolution img, _ = pad_to_square(img, 0) # Resize img = resize(img, img_size) img = img.unsqueeze(0) #(1,3,416.419) input_imgs = img.cuda() with torch.no_grad(): detections = model(input_imgs) detections = non_max_suppression(detections, conf_thres, nms_thres) if detections is not None: detections = detections[0] if detections is not None: detections = rescale_boxes(detections, img_size, camInputFrame.shape[:2]) return detections
##=== main train === for epoch in range(opt.epochs): model.train() if TQDM_USE: dataloader=tqdm(dataloader) for batch_i, (_, imgs, targets) in enumerate(dataloader): batches_done = len(dataloader) * epoch + batch_i # imgs.shape(batch_size, 3, img_size, img_size) # targets.shape(num_bboxes, 6_vals), 6_val=(idx, labels, x, y, w, h) ##=== multi-scale training === # Select new image size every 10 batch if opt.multiscale_training and batch_i % 10 == 0: img_cur_size = random.choice(range(img_min_size, img_max_size + 1, 32)) imgs = resize(imgs, img_cur_size) imgs = Variable(imgs.to(device)) targets = Variable(targets.to(device), requires_grad=False) loss, outputs = model(imgs, targets) loss.backward() if batches_done % opt.gradient_accumulations: # Accumulates gradient before each step optimizer.step() optimizer.zero_grad() model.seen += imgs.size(0) # === Log metrics at each YOLO layer ===
if not ret: break print("---------------读取第" + str(nums) + "帧") frame_start_t = time.time() # cv2 读取图片转换为 PIL 格式 转换为 Tensor # img = torchvision.transforms.ToTensor()(Image.open(img_path).convert(mode="RGB")) frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img = torchvision.transforms.ToTensor()(frame_pil.convert(mode="RGB")) # NEW 创建一个可用来对其进行draw的对象 draw = ImageDraw.Draw(frame_pil) input_imgs, _ = pad_to_square(img, 0) # Resize input_imgs = resize(input_imgs, opt.img_size).unsqueeze(0) # Configure input input_imgs = Variable(input_imgs.type(Tensor)) tensor_t = time.time() print("转换为 Tensor 用时:" + str(time.time() - frame_start_t)) # 开始检测 with torch.no_grad(): detections = model(input_imgs.to(device)) detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)[0] detect_t = time.time() print("进行物体检测用时:" + str(detect_t - tensor_t)) # 处理每一帧的检测结果
def YOLO(): parser = argparse.ArgumentParser() parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file") parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file") parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold") parser.add_argument("--nms_thres", type=float, default=0.2, help="iou thresshold for non-maximum suppression") parser.add_argument("--batch_size", type=int, default=1, help="size of the batches") parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") parser.add_argument("--video", type=str, required=True, help="input video") parser.add_argument("--display", action="store_true", default=False) parser.add_argument("--output", default="./output", help="output dir") opt = parser.parse_args() print(opt) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Darknet(opt.model_def, img_size=opt.img_size).to(device) if opt.weights_path.endswith(".weights"): # Load darknet weights model.load_darknet_weights(opt.weights_path) else: # Load checkpoint weights model.load_state_dict(torch.load(opt.weights_path)) model.eval() # Set in evaluation mode classes = load_classes(opt.class_path) # Extracts class labels from file Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor #cap = cv2.VideoCapture(0) cap = cv2.VideoCapture(opt.video) cap.set(3, 1280) cap.set(4, 720) # out = cv2.VideoWriter( # "output.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10.0, # (darknet.network_width(netMain), darknet.network_height(netMain))) print("Starting the YOLO loop...") while True: try: prev_time = time.time() ret, frame_read = cap.read() frame = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) # Extract image as PyTorch tensor img = transforms.ToTensor()(frame) # Pad to square resolution img, _ = pad_to_square(img, 0) # Resize img = resize(img, opt.img_size) img = img.unsqueeze(0) # Configure input input_imgs = nn.Variable(img.type(Tensor)) # Get detections with torch.no_grad(): detections = model(input_imgs) detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) detections = list(filter(lambda x: x is not None, detections)) if detections is not None and len(detections) > 0: # Rescale boxes to original image detections = rescale_boxes(detections[0], opt.img_size, frame.shape[:2]) frame = cvDrawBoxes(frame, detections, classes) current_time = datetime.datetime.now() if int(time.time()*10) % 10 == 0: str_date = datetime.datetime.strftime(current_time, "%Y%m%d") str_time = datetime.datetime.strftime(current_time, "%Y%m%d%H%M%S") os.makedirs(os.path.join(opt.output, str_date), exist_ok=True) cv2.imwrite(os.path.join(opt.output, str_date, str_time + ".jpg"), frame) # print(1/(time.time()-prev_time)) if opt.display: cv2.imshow('Demo', frame) cv2.waitKey(3) except Exception as e: print("fail to detect", e) cap.release()