Example #1
0
    def __init__(
        self,
        segmenter: nn.Module = segmentation.deeplabv3_resnet101(
            pretrained=True),
        keypoint_estimator: nn.Module = detection.keypointrcnn_resnet50_fpn(
            pretrained=True),
        input_height: int = 600,
    ):
        self.segmenter = segmenter
        self.keypoint_estimator = keypoint_estimator
        self.input_height = input_height

        self.cache = {}
        self.cache["keypoints"] = {}
        self.cache["masks"] = {}
        self.cache["images"] = {}
        # Move to GPUs if available
        # BodyPoseEstimator handles this for itself
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        if self.segmenter:
            segmenter.to(self.device)
        self.segmenter.eval()
        if self.keypoint_estimator:
            keypoint_estimator.to(self.device)
        self.keypoint_estimator.eval()
Example #2
0
 def __init__(self, keypoint_treshold: float, batch_size: int):
     super().__init__()
     model = keypointrcnn_resnet50_fpn(pretrained=True)
     model.eval()
     to_cuda(model)
     self.batch_size = batch_size
     self.keypoint_threshold = keypoint_treshold
     self.model = model
Example #3
0
    def __init__(self,
                 device=None,
                 batch_size=12,
                 display=False,
                 detection_threshold=0.7,
                 detector_type='yolo',
                 yolo_img_size=608,
                 output_format='list',
                 detector_checkpoint=None,
                 detector_config=None):
        '''
        Multi Person Tracker

        :param device (str, 'cuda' or 'cpu'): torch device for model and inputs
        :param batch_size (int): batch size for detection model
        :param display (bool): display the results of multi person tracking
        :param detection_threshold (float): threshold to filter detector predictions
        :param detector_type (str, 'maskrcnn' or 'yolo'): detector architecture
        :param yolo_img_size (int): yolo detector input image size
        :param output_format (str, 'dict' or 'list'): result output format
        '''

        if device is not None:
            self.device = device
        else:
            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        self.batch_size = batch_size
        self.display = display
        self.detection_threshold = detection_threshold
        self.output_format = output_format
        self.detector_type = detector_type
        self.detector_checkpoint = detector_checkpoint[0] if type(
            detector_checkpoint) == tuple else detector_checkpoint,
        self.detector_config = detector_config[0] if type(
            detector_config) == tuple else detector_config

        if self.detector_type == 'maskrcnn':
            self.detector = keypointrcnn_resnet50_fpn(pretrained=True).to(
                self.device).eval()
        elif self.detector_type == 'yolo':
            # self.detector = YOLOv3(
            #     device=self.device, img_size=yolo_img_size, person_detector=True, video=True, return_dict=True
            # )
            pass
            # output [{'boxes': tensor([], size=(0, 4)),
            #           'scores': tensor([]),
            #           'classes': tensor([])}]
            # x = torch.Tensor([np.random.rand(3, 300, 400), np.random.rand(3, 300, 400)])
            # print(self.detector(x))
        elif self.detector_type == 'retina':
            self.detector = init_detector(self.detector_config,
                                          self.detector_checkpoint[0],
                                          device='cuda:0')
        else:
            raise ModuleNotFoundError

        self.tracker = Sort()
Example #4
0
def keypointrcnn_resnet50_fpn(input_size=None, output_size=None):
    """with pretrained_backbone"""
    if import_error is not None:
        raise import_error

    if not isinstance(output_size, int):
        output_size = numpy.product(input_size)

    return detection.keypointrcnn_resnet50_fpn(num_classes=output_size)
Example #5
0
 def __init__(self, pretrained=False):
     super().__init__()
     keypoint_rcnn = keypointrcnn_resnet50_fpn(pretrained=pretrained)
     self.backbone = keypoint_rcnn.backbone
     self.head_conv0 = nn.Conv2d(256, 64, (7, 7))
     self.head_conv1 = nn.Conv2d(256, 64, (5, 5))
     self.head_conv2 = nn.Conv2d(256, 64, (3, 3))
     self.head_conv3 = nn.Conv2d(256, 64, (3, 3))
     self.head_conv_pool = nn.Conv2d(256, 64, (3, 3))
     self.fc = nn.Linear(64 * (50 ** 2 + 24 ** 2 + 12 ** 2 + 5 ** 2 + 2 ** 2), 1)
Example #6
0
def get_keypoint_detection_model(num_classes=2, num_keypoints=6, device=None):
    model = keypointrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features,
                                                      num_classes)  # 本来就只有两类
    in_features_keypoint = model.roi_heads.keypoint_predictor.kps_score_lowres.in_channels
    model.roi_heads.keypoint_predictor = KeypointRCNNPredictor(
        in_features_keypoint, num_keypoints)
    model.to(device)
    return model
Example #7
0
 def __init__(self, size=480, out_size=480):
     self.transform = transforms.Compose([
         transforms.ToPILImage(),
         transforms.CenterCrop(size),
         transforms.Resize(out_size),
         transforms.ToTensor()
     ])
     # torch_tensor_to_img(img, display=True)
     model = keypointrcnn_resnet50_fpn(pretrained=True)
     model.eval()
     model.to(DEVICE)
     self.model = model
Example #8
0
    def __init__(self,
                 score_threshold: float = 0.5,
                 nms_threshold: float = 0.5):
        self.score_threshold = score_threshold
        self.nms_threshold = nms_threshold

        self.model = keypointrcnn_resnet50_fpn(
            pretrained=True,
            box_score_thresh=self.score_threshold,
            box_nms_thresh=self.nms_threshold).cuda()

        self.model.eval()
        self.name_to_index = {
            name: index
            for index, name in enumerate(self.COCO_PERSON_KEYPOINT_NAMES)
        }
Example #9
0
    def __init__(
        self,
        device=None,
        batch_size=12,
        display=False,
        detection_threshold=0.7,
        detector_type='yolo',
        yolo_img_size=608,
        output_format='list',
    ):
        '''
        Multi Person Tracker

        :param device (str, 'cuda' or 'cpu'): torch device for model and inputs
        :param batch_size (int): batch size for detection model
        :param display (bool): display the results of multi person tracking
        :param detection_threshold (float): threshold to filter detector predictions
        :param detector_type (str, 'maskrcnn' or 'yolo'): detector architecture
        :param yolo_img_size (int): yolo detector input image size
        :param output_format (str, 'dict' or 'list'): result output format
        '''

        if device is not None:
            self.device = device
        else:
            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        self.batch_size = batch_size
        self.display = display
        self.detection_threshold = detection_threshold
        self.output_format = output_format

        if detector_type == 'maskrcnn':
            self.detector = keypointrcnn_resnet50_fpn(pretrained=True).to(
                self.device).eval()
        elif detector_type == 'yolo':
            self.detector = YOLOv3(device=self.device,
                                   img_size=yolo_img_size,
                                   person_detector=True,
                                   video=True,
                                   return_dict=True)
        else:
            raise ModuleNotFoundError

        self.tracker = Sort()
Example #10
0
 def test_keypointrcnn_resnet50_fpn_frozen_layers(self):
     # we know how many initial layers and parameters of the keypointrcnn should
     # be frozen for each trainable_backbone_layers paramter value
     # i.e all 53 params are frozen if trainable_backbone_layers=0
     # ad first 24 params are frozen if trainable_backbone_layers=2
     expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0}
     for train_layers, exp_froz_params in expected_frozen_params.items():
         model = keypointrcnn_resnet50_fpn(
             pretrained=True,
             progress=False,
             num_classes=2,
             pretrained_backbone=False,
             trainable_backbone_layers=train_layers)
         # boolean list that is true if the parameter at that index is frozen
         is_frozen = [
             not parameter.requires_grad
             for _, parameter in model.named_parameters()
         ]
         # check that expected initial number of layers in keypointrcnn are frozen
         self.assertTrue(all(is_frozen[:exp_froz_params]))
Example #11
0
    (0,1), (0, 2), (2, 4), (1, 3), (6, 8), (8, 10), (9, 18),
    (10, 19), (5, 7), (7, 9), (11, 13), (13, 15), (12, 14),
    (14, 16), (15, 22), (16, 23), (20, 21), (5, 6), (5, 11),
    (6, 12), (11, 12), (17, 20), (20, 21),
]

image = cv2.imread('../keypoint-detection/train_imgs/002-1-1-01-Z17_C-0000011.jpg', cv2.COLOR_BGR2RGB)
draw_keypoints(image, keypoints, edges, keypoint_names, boxes=False, dpi=400)

image = cv2.imread('./keypoint-detection/train_imgs/001-1-1-01-Z17_A-0000001.jpg', cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (1333, 800))
image = image / 255.0
image = image.transpose(2, 0, 1)
image = [torch.as_tensor(image, dtype=torch.float32)]

model = keypointrcnn_resnet50_fpn(pretrained=True, progress=False)
model.eval()
preds = model(image)
preds[0].keys()

keypoints = preds[0]['keypoints'].detach().numpy().copy()[0]
image = cv2.imread('./keypoint-detection/train_imgs/001-1-1-01-Z17_A-0000001.jpg', cv2.COLOR_BGR2RGB)
keypoints[:, 0] *= image.shape[1] / 1333
keypoints[:, 1] *= image.shape[0] / 800
keypoints = keypoints[:, 2]

edges = [
    (0, 1), (0, 2), (2, 4), (1, 3), (6, 8), (8, 10),
    (5, 7), (7, 9), (5, 11), (11, 13), (13, 15), (6, 12),
    (12, 14), (14, 16), (5, 6)
]
Example #12
0
def preprocess_images(
        image_folder: str,
        exp_cfg,
        num_workers: int = 8,
        batch_size: int = 1,
        min_score: float = 0.5,
        scale_factor: float = 1.2,
        device: Optional[torch.device] = None) -> dutils.DataLoader:

    if device is None:
        device = torch.device('cuda')
        if not torch.cuda.is_available():
            logger.error('CUDA is not available!')
            sys.exit(3)

    rcnn_model = keypointrcnn_resnet50_fpn(pretrained=True)
    rcnn_model.eval()
    rcnn_model = rcnn_model.to(device=device)

    transform = Compose([
        ToTensor(),
    ])

    # Load the images
    dataset = ImageFolder(image_folder, transforms=transform)
    rcnn_dloader = dutils.DataLoader(dataset,
                                     batch_size=batch_size,
                                     num_workers=num_workers,
                                     collate_fn=collate_fn)

    out_dir = osp.expandvars('$HOME/Dropbox/boxes')
    os.makedirs(out_dir, exist_ok=True)

    img_paths = []
    bboxes = []
    for bidx, batch in enumerate(
            tqdm(rcnn_dloader, desc='Processing with R-CNN')):
        batch['images'] = [x.to(device=device) for x in batch['images']]

        output = rcnn_model(batch['images'])
        for ii, x in enumerate(output):
            img = np.transpose(batch['images'][ii].detach().cpu().numpy(),
                               [1, 2, 0])
            img = (img * 255).astype(np.uint8)

            img_path = batch['paths'][ii]
            _, fname = osp.split(img_path)
            fname, _ = osp.splitext(fname)

            #  out_path = osp.join(out_dir, f'{fname}_{ii:03d}.jpg')
            for n, bbox in enumerate(output[ii]['boxes']):
                bbox = bbox.detach().cpu().numpy()
                if output[ii]['scores'][n].item() < min_score:
                    continue
                img_paths.append(img_path)
                bboxes.append(bbox)

                #  cv2.rectangle(img, tuple(bbox[:2]), tuple(bbox[2:]),
                #  (255, 0, 0))
            #  cv2.imwrite(out_path, img[:, :, ::-1])

    dataset_cfg = exp_cfg.get('datasets', {})
    body_dsets_cfg = dataset_cfg.get('body', {})

    body_transfs_cfg = body_dsets_cfg.get('transforms', {})
    transforms = build_transforms(body_transfs_cfg, is_train=False)
    batch_size = body_dsets_cfg.get('batch_size', 64)

    expose_dset = ImageFolderWithBoxes(img_paths,
                                       bboxes,
                                       scale_factor=scale_factor,
                                       transforms=transforms)

    expose_collate = functools.partial(collate_batch,
                                       use_shared_memory=num_workers > 0,
                                       return_full_imgs=True)
    expose_dloader = dutils.DataLoader(
        expose_dset,
        batch_size=batch_size,
        num_workers=num_workers,
        collate_fn=expose_collate,
        drop_last=False,
        pin_memory=True,
    )
    return expose_dloader
cudnn.enabled = True

rpn_n = 4
"""
from torchvision.models.detection import fasterrcnn_resnet50_fpn
model = fasterrcnn_resnet50_fpn(pretrained=True,
                                  min_size=128,
                                  rpn_pre_nms_top_n_test=rpn_n,
                                  rpn_post_nms_top_n_test=max(1, rpn_n // 2),
                                  box_score_thresh=0.5,
                                  box_detections_per_img=5)
"""
model = keypointrcnn_resnet50_fpn(
    pretrained=True,
    min_size=128,
    rpn_pre_nms_top_n_test=rpn_n,
    rpn_post_nms_top_n_test=max(1, rpn_n // 2),
    box_score_thresh=0.5,
    box_detections_per_img=3,
)
model.eval()


# model.cuda() # Construct the network and move to GPU


def get_preds(img_t: torch.Tensor, threshold=0.7):
    """
Make `img` a tensor, transfer to GPU and run inference.
Returns bounding boxes and keypoints for each person.
"""
    with torch.no_grad():
Example #14
0
# We will first have a look at output of the model.
#
# Note that the keypoint detection model does not need normalized images.
#

from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
from torchvision.io import read_image

person_int = read_image(str(Path("assets") / "person1.jpg"))

weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

person_float = transforms(person_int)

model = keypointrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

outputs = model([person_float])
print(outputs)

#####################################
# As we see the output contains a list of dictionaries.
# The output list is of length batch_size.
# We currently have just a single image so length of list is 1.
# Each entry in the list corresponds to an input image,
# and it is a dict with keys `boxes`, `labels`, `scores`, `keypoints` and `keypoint_scores`.
# Each value associated to those keys has `num_instances` elements in it.
# In our case above there are 2 instances detected in the image.

kpts = outputs[0]['keypoints']
Example #15
0
import numpy as np
import torch
import tqdm
from deep_privacy.torch_utils import to_cuda, image_to_torch
from torchvision.models.detection import keypointrcnn_resnet50_fpn

model = keypointrcnn_resnet50_fpn(pretrained=True)
model.eval()
to_cuda(model)


def detect_keypoints(img, keypoint_threshold=.3):
    img = image_to_torch(img, cuda=True)[0]
    with torch.no_grad():
        outputs = model([img])

    # Shape: [N persons, K keypoints, (x,y,visibility)]
    keypoints = outputs[0]["keypoints"]
    scores = outputs[0]["scores"]
    assert list(scores) == sorted(list(scores))[::-1]
    mask = scores > keypoint_threshold
    keypoints = keypoints[mask, :, :2]
    return keypoints.cpu().numpy()


def batch_detect_keypoints(images, keypoint_threshold=.3):
    images = [image_to_torch(im, cuda=False)[0] for im in images]
    batch_size = 16
    keypoints = []
    scores = []
    if len(images) > 0: