Ejemplo n.º 1
0
def prep_image(img, inp_dim):
    orig_im = img
    dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
    img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim
Ejemplo n.º 2
0
def prep_image(img, model_dim):
    """
    Prepare image for input to the neural network. 
    """
    orig_im = img
    orig_dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (model_dim, model_dim)))
    img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, orig_dim
 def prep_frame(
         self, frame: np.ndarray
 ) -> Tuple[np.ndarray, np.ndarray, Tuple[int, int]]:
     original_frame = frame
     dim = original_frame.shape[1], original_frame.shape[0]
     frame = (letterbox_image(original_frame,
                              (self.input_dim, self.input_dim)))
     frame_ = frame[:, :, ::-1].transpose((2, 0, 1)).copy()
     frame_ = torch.from_numpy(frame_).float().div(255.0).unsqueeze(0)
     return frame_, original_frame, dim
Ejemplo n.º 4
0
def prep_image(img, inp_dim):
    """
    Prepare image for inputting to the neural network.
    Returns a Variable 
    """
    orig_im = img
    dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
    img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()  #实现从BGR到RGB的转换
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim
Ejemplo n.º 5
0
def prep_image(img, inp_dim):  #이미지변환
    """
    Prepare image for inputting to the neural network. 
    
    Returns a Variable 
    """

    orig_im = img
    dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (inp_dim, inp_dim)))  #이미지를 정보화
    img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)  #이미지를 숫자로 표현
    return img_, orig_im, dim  #img_ is divided!! -> value b2n 0 and 1
Ejemplo n.º 6
0
def prep_image(img, inp_dim, rotation):
    """
    Prepare image for inputting to the neural network. 
    
    Returns a Variable 
    """
    rows,cols, channels = img.shape
    M = cv2.getRotationMatrix2D((cols/2,rows/2),int(rotation),1)
    orig_im = cv2.warpAffine(img,M,(cols,rows))

    dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
    img_ = img[:,:,::-1].transpose((2,0,1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim
Ejemplo n.º 7
0
def prep_image(img, inp_dim):
    """
    Prepare image for inputting to the neural network.

    Returns a Variable

    OpenCV 会将图像载入为 numpy 数组,颜色通道的顺序为 BGR。PyTorch 的图像输入格式是(batch x 通道 x 高度 x 宽度),其通道顺序为 RGB。
    因此,用 prep_image 来将 numpy 数组转换成 PyTorch 的输入格式。
    """

    orig_im = img
    dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
    img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim
Ejemplo n.º 8
0
    def detect(self, image):
        # Preprocess the image
        w, h = image.shape[1], image.shape[0]
        img = (letterbox_image(image, (self.inp_dim, self.inp_dim)))
        img_ = img[:,:,::-1].transpose((2,0,1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        im_dim_list = torch.FloatTensor([ [w, h] ]).repeat(1,2)

        # Send to the model for prediction
        if self.CUDA:
            img_ = img_.cuda()
        with torch.no_grad():
            prediction = self.model(Variable(img_), self.CUDA)
        output = write_results(prediction, self.CONFIDENCE_THRESHOLD, self.NUM_CLASSES, nms=True, nms_conf=self.NMS_THRESHOLD) # This function does NMS and converts the format. Returns 0 if no results are found.
        # output has the format of [ class, cx, cy, w, h, confidence ]

        if type(output) == int:
            return None
        
        # Convert back to the coordinates in the original image before resizing
        # We need somewhat complicated processing here because letter boxing was used in preprocessing
        output = output.detach().cpu()
        scaling_factor = torch.min(self.inp_dim/im_dim_list,1)[0].view(-1,1)
        
        output[:,[1,3]] -= (self.inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
        output[:,[2,4]] -= (self.inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
        output[:,1:5] /= scaling_factor
        
        for i in range(output.shape[0]):
            output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[0,0])
            output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[0,1])

        return [ { 'l': int(output[i, 1].item()), 
            't': int(output[i, 2].item()),
            'r': int(output[i, 3].item()),
            'b': int(output[i, 4].item()), 
            'confidence': output[i, 5].item() } for i in range(output.shape[0]) ]
    face_in_frame = 1

    ret, frame = cap.read()
    # Break if not correctly read
    if not ret:
        break

    small_frame = imutils.resize(frame, width=small_frame_w)
    r = frame.shape[1] / small_frame.shape[1]

    ############################################################
    ##### FOR YOLO #############################################
    ############################################################
    orig_im = small_frame.copy()
    dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
    img = img[:, :, ::-1].transpose((2, 0, 1)).copy()
    img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
    im_dim = torch.FloatTensor(dim).repeat(1, 2)
    if CUDA:
        im_dim = im_dim.cuda()
        img = img.cuda()
    with torch.no_grad():
        output = model(Variable(img), CUDA)
    output = write_results(output,
                           confidence,
                           num_classes,
                           nms=True,
                           nms_conf=nms_thesh)
    im_dim = im_dim.repeat(output.size(0), 1)
    scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)