def prep_image(img, inp_dim): orig_im = img dim = orig_im.shape[1], orig_im.shape[0] img = (letterbox_image(orig_im, (inp_dim, inp_dim))) img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) return img_, orig_im, dim
def prep_image(img, model_dim): """ Prepare image for input to the neural network. """ orig_im = img orig_dim = orig_im.shape[1], orig_im.shape[0] img = (letterbox_image(orig_im, (model_dim, model_dim))) img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) return img_, orig_im, orig_dim
def prep_frame( self, frame: np.ndarray ) -> Tuple[np.ndarray, np.ndarray, Tuple[int, int]]: original_frame = frame dim = original_frame.shape[1], original_frame.shape[0] frame = (letterbox_image(original_frame, (self.input_dim, self.input_dim))) frame_ = frame[:, :, ::-1].transpose((2, 0, 1)).copy() frame_ = torch.from_numpy(frame_).float().div(255.0).unsqueeze(0) return frame_, original_frame, dim
def prep_image(img, inp_dim): """ Prepare image for inputting to the neural network. Returns a Variable """ orig_im = img dim = orig_im.shape[1], orig_im.shape[0] img = (letterbox_image(orig_im, (inp_dim, inp_dim))) img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() #实现从BGR到RGB的转换 img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) return img_, orig_im, dim
def prep_image(img, inp_dim): #이미지변환 """ Prepare image for inputting to the neural network. Returns a Variable """ orig_im = img dim = orig_im.shape[1], orig_im.shape[0] img = (letterbox_image(orig_im, (inp_dim, inp_dim))) #이미지를 정보화 img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) #이미지를 숫자로 표현 return img_, orig_im, dim #img_ is divided!! -> value b2n 0 and 1
def prep_image(img, inp_dim, rotation): """ Prepare image for inputting to the neural network. Returns a Variable """ rows,cols, channels = img.shape M = cv2.getRotationMatrix2D((cols/2,rows/2),int(rotation),1) orig_im = cv2.warpAffine(img,M,(cols,rows)) dim = orig_im.shape[1], orig_im.shape[0] img = (letterbox_image(orig_im, (inp_dim, inp_dim))) img_ = img[:,:,::-1].transpose((2,0,1)).copy() img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) return img_, orig_im, dim
def prep_image(img, inp_dim): """ Prepare image for inputting to the neural network. Returns a Variable OpenCV 会将图像载入为 numpy 数组,颜色通道的顺序为 BGR。PyTorch 的图像输入格式是(batch x 通道 x 高度 x 宽度),其通道顺序为 RGB。 因此,用 prep_image 来将 numpy 数组转换成 PyTorch 的输入格式。 """ orig_im = img dim = orig_im.shape[1], orig_im.shape[0] img = (letterbox_image(orig_im, (inp_dim, inp_dim))) img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) return img_, orig_im, dim
def detect(self, image): # Preprocess the image w, h = image.shape[1], image.shape[0] img = (letterbox_image(image, (self.inp_dim, self.inp_dim))) img_ = img[:,:,::-1].transpose((2,0,1)).copy() img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) im_dim_list = torch.FloatTensor([ [w, h] ]).repeat(1,2) # Send to the model for prediction if self.CUDA: img_ = img_.cuda() with torch.no_grad(): prediction = self.model(Variable(img_), self.CUDA) output = write_results(prediction, self.CONFIDENCE_THRESHOLD, self.NUM_CLASSES, nms=True, nms_conf=self.NMS_THRESHOLD) # This function does NMS and converts the format. Returns 0 if no results are found. # output has the format of [ class, cx, cy, w, h, confidence ] if type(output) == int: return None # Convert back to the coordinates in the original image before resizing # We need somewhat complicated processing here because letter boxing was used in preprocessing output = output.detach().cpu() scaling_factor = torch.min(self.inp_dim/im_dim_list,1)[0].view(-1,1) output[:,[1,3]] -= (self.inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2 output[:,[2,4]] -= (self.inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2 output[:,1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[0,0]) output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[0,1]) return [ { 'l': int(output[i, 1].item()), 't': int(output[i, 2].item()), 'r': int(output[i, 3].item()), 'b': int(output[i, 4].item()), 'confidence': output[i, 5].item() } for i in range(output.shape[0]) ]
face_in_frame = 1 ret, frame = cap.read() # Break if not correctly read if not ret: break small_frame = imutils.resize(frame, width=small_frame_w) r = frame.shape[1] / small_frame.shape[1] ############################################################ ##### FOR YOLO ############################################# ############################################################ orig_im = small_frame.copy() dim = orig_im.shape[1], orig_im.shape[0] img = (letterbox_image(orig_im, (inp_dim, inp_dim))) img = img[:, :, ::-1].transpose((2, 0, 1)).copy() img = torch.from_numpy(img).float().div(255.0).unsqueeze(0) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)