Ejemplo n.º 1
0
    def detect(self, img_path):

        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        _, H, W = img.shape
        h_factor, w_factor = (H, W)
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape
        imgs = torch.stack([resize(img_, 416) for img_ in [img]]).to('cuda')

        with torch.no_grad():
            output = self.net(imgs)
            output = non_max_suppression(output, 0.5, 0.5)[0]
        output = rescale_boxes(output, 416, (H, W)).numpy()
        detections = []

        boxes = []
        confidences = []
        class_ids = []

        for x1, y1, x2, y2, conf, cls_conf, cls_pred in output:
            width = x2 - x1
            height = y2 - y1
            x = x1
            y = y1
            boxes.append([x, y, int(width), int(height)])
            confidences.append(float(cls_conf))
            class_ids.append(int(cls_pred))
            class_ = self.get_class(int(cls_pred))
            top_left = (int(x1), int(y1))
            bottom_right = (int(x2), int(y2))
            box_2d = [top_left, bottom_right]
            detections.append(Detection(box_2d, class_))

        return detections
Ejemplo n.º 2
0
def Yolo_detect(model,
                camInputFrame,
                img_size=416,
                conf_thres=0.8,
                nms_thres=0.4):

    img = transforms.ToTensor()(Image.fromarray(camInputFrame))
    # Pad to square resolution
    img, _ = pad_to_square(img, 0)
    # Resize
    img = resize(img, img_size)
    img = img.unsqueeze(0)  #(1,3,416.419)

    input_imgs = img.cuda()
    with torch.no_grad():
        detections = model(input_imgs)
        detections = non_max_suppression(detections, conf_thres, nms_thres)

    if detections is not None:
        detections = detections[0]
        if detections is not None:
            detections = rescale_boxes(detections, img_size,
                                       camInputFrame.shape[:2])
    return detections
Ejemplo n.º 3
0
    ##=== main train ===
    for epoch in range(opt.epochs):
        model.train()
        if TQDM_USE: dataloader=tqdm(dataloader)
        for batch_i, (_, imgs, targets) in enumerate(dataloader):
            batches_done = len(dataloader) * epoch + batch_i

            # imgs.shape(batch_size, 3, img_size, img_size)
            # targets.shape(num_bboxes, 6_vals), 6_val=(idx, labels, x, y, w, h)

            ##=== multi-scale training ===
            # Select new image size every 10 batch
            if opt.multiscale_training and batch_i % 10 == 0:
                img_cur_size = random.choice(range(img_min_size, img_max_size + 1, 32))
            imgs = resize(imgs, img_cur_size)
 
            imgs    = Variable(imgs.to(device))
            targets = Variable(targets.to(device), requires_grad=False)

            loss, outputs = model(imgs, targets)
            loss.backward()

            if batches_done % opt.gradient_accumulations:
                # Accumulates gradient before each step
                optimizer.step()
                optimizer.zero_grad()

            model.seen += imgs.size(0)

            # === Log metrics at each YOLO layer ===
        if not ret:
            break

        print("---------------读取第" + str(nums) + "帧")

        frame_start_t = time.time()
        # cv2 读取图片转换为 PIL 格式 转换为 Tensor
        # img = torchvision.transforms.ToTensor()(Image.open(img_path).convert(mode="RGB"))
        frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        img = torchvision.transforms.ToTensor()(frame_pil.convert(mode="RGB"))
        # NEW 创建一个可用来对其进行draw的对象
        draw = ImageDraw.Draw(frame_pil)

        input_imgs, _ = pad_to_square(img, 0)
        # Resize
        input_imgs = resize(input_imgs, opt.img_size).unsqueeze(0)

        # Configure input
        input_imgs = Variable(input_imgs.type(Tensor))
        tensor_t = time.time()
        print("转换为 Tensor 用时:" + str(time.time() - frame_start_t))

        # 开始检测
        with torch.no_grad():
            detections = model(input_imgs.to(device))
            detections = non_max_suppression(detections, opt.conf_thres,
                                             opt.nms_thres)[0]
        detect_t = time.time()
        print("进行物体检测用时:" + str(detect_t - tensor_t))

        # 处理每一帧的检测结果
Ejemplo n.º 5
0
def YOLO():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
    parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
    parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
    parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold")
    parser.add_argument("--nms_thres", type=float, default=0.2, help="iou thresshold for non-maximum suppression")
    parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
    parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
    parser.add_argument("--video", type=str, required=True, help="input video")
    parser.add_argument("--display", action="store_true", default=False)
    parser.add_argument("--output", default="./output", help="output dir")
    opt = parser.parse_args()
    print(opt)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Darknet(opt.model_def, img_size=opt.img_size).to(device)

    if opt.weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(opt.weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(opt.weights_path))

    model.eval()  # Set in evaluation mode

    classes = load_classes(opt.class_path)  # Extracts class labels from file

    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

    #cap = cv2.VideoCapture(0)
    cap = cv2.VideoCapture(opt.video)
    cap.set(3, 1280)
    cap.set(4, 720)
    # out = cv2.VideoWriter(
    #     "output.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10.0,
    #     (darknet.network_width(netMain), darknet.network_height(netMain)))
    print("Starting the YOLO loop...")

    while True:
        try:
            prev_time = time.time()
            ret, frame_read = cap.read()

            frame = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB)

            # Extract image as PyTorch tensor
            img = transforms.ToTensor()(frame)

            # Pad to square resolution
            img, _ = pad_to_square(img, 0)
            # Resize
            img = resize(img, opt.img_size)
            img = img.unsqueeze(0)
            # Configure input
            input_imgs = nn.Variable(img.type(Tensor))
            # Get detections
            with torch.no_grad():
                detections = model(input_imgs)
                detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            detections = list(filter(lambda x: x is not None, detections))
            if detections is not None and len(detections) > 0:
                # Rescale boxes to original image
                detections = rescale_boxes(detections[0], opt.img_size, frame.shape[:2])
                frame = cvDrawBoxes(frame, detections, classes)
                current_time = datetime.datetime.now()
                if int(time.time()*10) % 10 == 0:
                    str_date = datetime.datetime.strftime(current_time, "%Y%m%d")
                    str_time = datetime.datetime.strftime(current_time, "%Y%m%d%H%M%S")
                    os.makedirs(os.path.join(opt.output, str_date), exist_ok=True)
                    cv2.imwrite(os.path.join(opt.output, str_date, str_time + ".jpg"), frame)
            # print(1/(time.time()-prev_time))
            if opt.display:
                cv2.imshow('Demo', frame)
                cv2.waitKey(3)
        except Exception as e:
            print("fail to detect", e)
    cap.release()