def __init__( self, segmenter: nn.Module = segmentation.deeplabv3_resnet101( pretrained=True), keypoint_estimator: nn.Module = detection.keypointrcnn_resnet50_fpn( pretrained=True), input_height: int = 600, ): self.segmenter = segmenter self.keypoint_estimator = keypoint_estimator self.input_height = input_height self.cache = {} self.cache["keypoints"] = {} self.cache["masks"] = {} self.cache["images"] = {} # Move to GPUs if available # BodyPoseEstimator handles this for itself self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") if self.segmenter: segmenter.to(self.device) self.segmenter.eval() if self.keypoint_estimator: keypoint_estimator.to(self.device) self.keypoint_estimator.eval()
def __init__(self, keypoint_treshold: float, batch_size: int): super().__init__() model = keypointrcnn_resnet50_fpn(pretrained=True) model.eval() to_cuda(model) self.batch_size = batch_size self.keypoint_threshold = keypoint_treshold self.model = model
def __init__(self, device=None, batch_size=12, display=False, detection_threshold=0.7, detector_type='yolo', yolo_img_size=608, output_format='list', detector_checkpoint=None, detector_config=None): ''' Multi Person Tracker :param device (str, 'cuda' or 'cpu'): torch device for model and inputs :param batch_size (int): batch size for detection model :param display (bool): display the results of multi person tracking :param detection_threshold (float): threshold to filter detector predictions :param detector_type (str, 'maskrcnn' or 'yolo'): detector architecture :param yolo_img_size (int): yolo detector input image size :param output_format (str, 'dict' or 'list'): result output format ''' if device is not None: self.device = device else: self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.batch_size = batch_size self.display = display self.detection_threshold = detection_threshold self.output_format = output_format self.detector_type = detector_type self.detector_checkpoint = detector_checkpoint[0] if type( detector_checkpoint) == tuple else detector_checkpoint, self.detector_config = detector_config[0] if type( detector_config) == tuple else detector_config if self.detector_type == 'maskrcnn': self.detector = keypointrcnn_resnet50_fpn(pretrained=True).to( self.device).eval() elif self.detector_type == 'yolo': # self.detector = YOLOv3( # device=self.device, img_size=yolo_img_size, person_detector=True, video=True, return_dict=True # ) pass # output [{'boxes': tensor([], size=(0, 4)), # 'scores': tensor([]), # 'classes': tensor([])}] # x = torch.Tensor([np.random.rand(3, 300, 400), np.random.rand(3, 300, 400)]) # print(self.detector(x)) elif self.detector_type == 'retina': self.detector = init_detector(self.detector_config, self.detector_checkpoint[0], device='cuda:0') else: raise ModuleNotFoundError self.tracker = Sort()
def keypointrcnn_resnet50_fpn(input_size=None, output_size=None): """with pretrained_backbone""" if import_error is not None: raise import_error if not isinstance(output_size, int): output_size = numpy.product(input_size) return detection.keypointrcnn_resnet50_fpn(num_classes=output_size)
def __init__(self, pretrained=False): super().__init__() keypoint_rcnn = keypointrcnn_resnet50_fpn(pretrained=pretrained) self.backbone = keypoint_rcnn.backbone self.head_conv0 = nn.Conv2d(256, 64, (7, 7)) self.head_conv1 = nn.Conv2d(256, 64, (5, 5)) self.head_conv2 = nn.Conv2d(256, 64, (3, 3)) self.head_conv3 = nn.Conv2d(256, 64, (3, 3)) self.head_conv_pool = nn.Conv2d(256, 64, (3, 3)) self.fc = nn.Linear(64 * (50 ** 2 + 24 ** 2 + 12 ** 2 + 5 ** 2 + 2 ** 2), 1)
def get_keypoint_detection_model(num_classes=2, num_keypoints=6, device=None): model = keypointrcnn_resnet50_fpn(pretrained=True) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # 本来就只有两类 in_features_keypoint = model.roi_heads.keypoint_predictor.kps_score_lowres.in_channels model.roi_heads.keypoint_predictor = KeypointRCNNPredictor( in_features_keypoint, num_keypoints) model.to(device) return model
def __init__(self, size=480, out_size=480): self.transform = transforms.Compose([ transforms.ToPILImage(), transforms.CenterCrop(size), transforms.Resize(out_size), transforms.ToTensor() ]) # torch_tensor_to_img(img, display=True) model = keypointrcnn_resnet50_fpn(pretrained=True) model.eval() model.to(DEVICE) self.model = model
def __init__(self, score_threshold: float = 0.5, nms_threshold: float = 0.5): self.score_threshold = score_threshold self.nms_threshold = nms_threshold self.model = keypointrcnn_resnet50_fpn( pretrained=True, box_score_thresh=self.score_threshold, box_nms_thresh=self.nms_threshold).cuda() self.model.eval() self.name_to_index = { name: index for index, name in enumerate(self.COCO_PERSON_KEYPOINT_NAMES) }
def __init__( self, device=None, batch_size=12, display=False, detection_threshold=0.7, detector_type='yolo', yolo_img_size=608, output_format='list', ): ''' Multi Person Tracker :param device (str, 'cuda' or 'cpu'): torch device for model and inputs :param batch_size (int): batch size for detection model :param display (bool): display the results of multi person tracking :param detection_threshold (float): threshold to filter detector predictions :param detector_type (str, 'maskrcnn' or 'yolo'): detector architecture :param yolo_img_size (int): yolo detector input image size :param output_format (str, 'dict' or 'list'): result output format ''' if device is not None: self.device = device else: self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.batch_size = batch_size self.display = display self.detection_threshold = detection_threshold self.output_format = output_format if detector_type == 'maskrcnn': self.detector = keypointrcnn_resnet50_fpn(pretrained=True).to( self.device).eval() elif detector_type == 'yolo': self.detector = YOLOv3(device=self.device, img_size=yolo_img_size, person_detector=True, video=True, return_dict=True) else: raise ModuleNotFoundError self.tracker = Sort()
def test_keypointrcnn_resnet50_fpn_frozen_layers(self): # we know how many initial layers and parameters of the keypointrcnn should # be frozen for each trainable_backbone_layers paramter value # i.e all 53 params are frozen if trainable_backbone_layers=0 # ad first 24 params are frozen if trainable_backbone_layers=2 expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0} for train_layers, exp_froz_params in expected_frozen_params.items(): model = keypointrcnn_resnet50_fpn( pretrained=True, progress=False, num_classes=2, pretrained_backbone=False, trainable_backbone_layers=train_layers) # boolean list that is true if the parameter at that index is frozen is_frozen = [ not parameter.requires_grad for _, parameter in model.named_parameters() ] # check that expected initial number of layers in keypointrcnn are frozen self.assertTrue(all(is_frozen[:exp_froz_params]))
(0,1), (0, 2), (2, 4), (1, 3), (6, 8), (8, 10), (9, 18), (10, 19), (5, 7), (7, 9), (11, 13), (13, 15), (12, 14), (14, 16), (15, 22), (16, 23), (20, 21), (5, 6), (5, 11), (6, 12), (11, 12), (17, 20), (20, 21), ] image = cv2.imread('../keypoint-detection/train_imgs/002-1-1-01-Z17_C-0000011.jpg', cv2.COLOR_BGR2RGB) draw_keypoints(image, keypoints, edges, keypoint_names, boxes=False, dpi=400) image = cv2.imread('./keypoint-detection/train_imgs/001-1-1-01-Z17_A-0000001.jpg', cv2.COLOR_BGR2RGB) image = cv2.resize(image, (1333, 800)) image = image / 255.0 image = image.transpose(2, 0, 1) image = [torch.as_tensor(image, dtype=torch.float32)] model = keypointrcnn_resnet50_fpn(pretrained=True, progress=False) model.eval() preds = model(image) preds[0].keys() keypoints = preds[0]['keypoints'].detach().numpy().copy()[0] image = cv2.imread('./keypoint-detection/train_imgs/001-1-1-01-Z17_A-0000001.jpg', cv2.COLOR_BGR2RGB) keypoints[:, 0] *= image.shape[1] / 1333 keypoints[:, 1] *= image.shape[0] / 800 keypoints = keypoints[:, 2] edges = [ (0, 1), (0, 2), (2, 4), (1, 3), (6, 8), (8, 10), (5, 7), (7, 9), (5, 11), (11, 13), (13, 15), (6, 12), (12, 14), (14, 16), (5, 6) ]
def preprocess_images( image_folder: str, exp_cfg, num_workers: int = 8, batch_size: int = 1, min_score: float = 0.5, scale_factor: float = 1.2, device: Optional[torch.device] = None) -> dutils.DataLoader: if device is None: device = torch.device('cuda') if not torch.cuda.is_available(): logger.error('CUDA is not available!') sys.exit(3) rcnn_model = keypointrcnn_resnet50_fpn(pretrained=True) rcnn_model.eval() rcnn_model = rcnn_model.to(device=device) transform = Compose([ ToTensor(), ]) # Load the images dataset = ImageFolder(image_folder, transforms=transform) rcnn_dloader = dutils.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn) out_dir = osp.expandvars('$HOME/Dropbox/boxes') os.makedirs(out_dir, exist_ok=True) img_paths = [] bboxes = [] for bidx, batch in enumerate( tqdm(rcnn_dloader, desc='Processing with R-CNN')): batch['images'] = [x.to(device=device) for x in batch['images']] output = rcnn_model(batch['images']) for ii, x in enumerate(output): img = np.transpose(batch['images'][ii].detach().cpu().numpy(), [1, 2, 0]) img = (img * 255).astype(np.uint8) img_path = batch['paths'][ii] _, fname = osp.split(img_path) fname, _ = osp.splitext(fname) # out_path = osp.join(out_dir, f'{fname}_{ii:03d}.jpg') for n, bbox in enumerate(output[ii]['boxes']): bbox = bbox.detach().cpu().numpy() if output[ii]['scores'][n].item() < min_score: continue img_paths.append(img_path) bboxes.append(bbox) # cv2.rectangle(img, tuple(bbox[:2]), tuple(bbox[2:]), # (255, 0, 0)) # cv2.imwrite(out_path, img[:, :, ::-1]) dataset_cfg = exp_cfg.get('datasets', {}) body_dsets_cfg = dataset_cfg.get('body', {}) body_transfs_cfg = body_dsets_cfg.get('transforms', {}) transforms = build_transforms(body_transfs_cfg, is_train=False) batch_size = body_dsets_cfg.get('batch_size', 64) expose_dset = ImageFolderWithBoxes(img_paths, bboxes, scale_factor=scale_factor, transforms=transforms) expose_collate = functools.partial(collate_batch, use_shared_memory=num_workers > 0, return_full_imgs=True) expose_dloader = dutils.DataLoader( expose_dset, batch_size=batch_size, num_workers=num_workers, collate_fn=expose_collate, drop_last=False, pin_memory=True, ) return expose_dloader
cudnn.enabled = True rpn_n = 4 """ from torchvision.models.detection import fasterrcnn_resnet50_fpn model = fasterrcnn_resnet50_fpn(pretrained=True, min_size=128, rpn_pre_nms_top_n_test=rpn_n, rpn_post_nms_top_n_test=max(1, rpn_n // 2), box_score_thresh=0.5, box_detections_per_img=5) """ model = keypointrcnn_resnet50_fpn( pretrained=True, min_size=128, rpn_pre_nms_top_n_test=rpn_n, rpn_post_nms_top_n_test=max(1, rpn_n // 2), box_score_thresh=0.5, box_detections_per_img=3, ) model.eval() # model.cuda() # Construct the network and move to GPU def get_preds(img_t: torch.Tensor, threshold=0.7): """ Make `img` a tensor, transfer to GPU and run inference. Returns bounding boxes and keypoints for each person. """ with torch.no_grad():
# We will first have a look at output of the model. # # Note that the keypoint detection model does not need normalized images. # from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights from torchvision.io import read_image person_int = read_image(str(Path("assets") / "person1.jpg")) weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT transforms = weights.transforms() person_float = transforms(person_int) model = keypointrcnn_resnet50_fpn(weights=weights, progress=False) model = model.eval() outputs = model([person_float]) print(outputs) ##################################### # As we see the output contains a list of dictionaries. # The output list is of length batch_size. # We currently have just a single image so length of list is 1. # Each entry in the list corresponds to an input image, # and it is a dict with keys `boxes`, `labels`, `scores`, `keypoints` and `keypoint_scores`. # Each value associated to those keys has `num_instances` elements in it. # In our case above there are 2 instances detected in the image. kpts = outputs[0]['keypoints']
import numpy as np import torch import tqdm from deep_privacy.torch_utils import to_cuda, image_to_torch from torchvision.models.detection import keypointrcnn_resnet50_fpn model = keypointrcnn_resnet50_fpn(pretrained=True) model.eval() to_cuda(model) def detect_keypoints(img, keypoint_threshold=.3): img = image_to_torch(img, cuda=True)[0] with torch.no_grad(): outputs = model([img]) # Shape: [N persons, K keypoints, (x,y,visibility)] keypoints = outputs[0]["keypoints"] scores = outputs[0]["scores"] assert list(scores) == sorted(list(scores))[::-1] mask = scores > keypoint_threshold keypoints = keypoints[mask, :, :2] return keypoints.cpu().numpy() def batch_detect_keypoints(images, keypoint_threshold=.3): images = [image_to_torch(im, cuda=False)[0] for im in images] batch_size = 16 keypoints = [] scores = [] if len(images) > 0: