Example #1
0
 def __call__(self, image, target):
     image = F.normalize(image, mean=self.mean, std=self.std)
     return image, target
Example #2
0
 def forward(self, img):
     img = torch.stack([F.normalize(i, mean, std) for i in img])
     x = self.base(img)  # x is a dict
     return x
Example #3
0
 def __call__(self, img, mask):
     return normalize(img, self.mean, self.std, False), mask
 def __call__(self, image, target):
     if self.to_bgr255:
         image = image[[2, 1, 0]] * 255
     image = F.normalize(image, mean=self.mean, std=self.std)
     return image, target
Example #5
0
 def to_tensor_norm(self, img):
     img = Image.fromarray(img)
     img_t = F.to_tensor(img).float()
     img_t = F.normalize(img_t, self.mean, self.std)  # 输入mean 和 std
     return img_t
Example #6
0
 def __call__(self, results):
     results['img'] = F.normalize(
         results['img'], mean=self.mean, std=self.std)
     return results
Example #7
0
 def __call__(self, sample):
     image, label = sample['image'], sample['label']
     image = F.normalize(image, self.mean, self.std)
     return {'image': image, 'label': label}
Example #8
0
 def __call__(self, images):
     normalized = np.stack([
         F.normalize(x, self.mean, self.std, self.inplace) for x in images
     ])
     return normalized
Example #9
0
 def transform_image(self, image):
     return tvisf.normalize(image, self.mean, self.std, self.inplace)
Example #10
0
 def __call__(self, image, cropped_image, target, **kwargs):
     output_image = F.normalize(image, mean=self.mean, std=self.std)
     output_cropped_image = F.normalize(cropped_image,
                                        mean=self.mean,
                                        std=self.std)
     return output_image, output_cropped_image, target
                # get starting error
                degradation = np.array([
                    2, 2, 4, 0.01
                ]) * 0  # should roughly equal localizer error covariance
                skew = np.random.normal(0, degradation, (len(batch), 4))
                gt_skew = gt + skew
                skewed_iou.append(iou(gt_skew, gt))

                # ims are collated by frame,then batch index
                relevant_ims = ims[frame_idx]
                frames = []
                for idx, item in enumerate(relevant_ims):
                    with Image.open(item) as im:
                        im = F.to_tensor(im)
                        frame = F.normalize(im,
                                            mean=[0.3721, 0.3880, 0.3763],
                                            std=[0.0555, 0.0584, 0.0658])
                        #correct smaller frames
                        if frame.shape[1] < 375:
                            new_frame = torch.zeros([3, 375, frame.shape[2]])
                            new_frame[:, :frame.shape[1], :] = frame
                            frame = new_frame
                        if frame.shape[2] < 1242:
                            new_frame = torch.zeros([3, 375, 1242])
                            new_frame[:, :, :frame.shape[2]] = frame
                            frame = new_frame

                        MASK = False
                        if MASK:

                            other_objs = dataset.frame_objs[item]
Example #12
0
    def paste_faces_to_input_image(self, save_path=None, upsample_img=None):
        h, w, _ = self.input_img.shape
        h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)

        if upsample_img is None:
            # simply resize the background
            upsample_img = cv2.resize(self.input_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)
        else:
            upsample_img = cv2.resize(upsample_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)

        assert len(self.restored_faces) == len(
            self.inverse_affine_matrices), ('length of restored_faces and affine_matrices are different.')
        for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
            # Add an offset to inverse affine matrix, for more precise back alignment
            if self.upscale_factor > 1:
                extra_offset = 0.5 * self.upscale_factor
            else:
                extra_offset = 0
            inverse_affine[:, 2] += extra_offset
            inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))

            if self.use_parse:
                # inference
                face_input = cv2.resize(restored_face, (512, 512), interpolation=cv2.INTER_LINEAR)
                face_input = img2tensor(face_input.astype('float32') / 255., bgr2rgb=True, float32=True)
                normalize(face_input, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
                face_input = torch.unsqueeze(face_input, 0).to(self.device)
                with torch.no_grad():
                    out = self.face_parse(face_input)[0]
                out = out.argmax(dim=1).squeeze().cpu().numpy()

                mask = np.zeros(out.shape)
                MASK_COLORMAP = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 0, 0, 0]
                for idx, color in enumerate(MASK_COLORMAP):
                    mask[out == idx] = color
                #  blur the mask
                mask = cv2.GaussianBlur(mask, (101, 101), 11)
                mask = cv2.GaussianBlur(mask, (101, 101), 11)
                # remove the black borders
                thres = 10
                mask[:thres, :] = 0
                mask[-thres:, :] = 0
                mask[:, :thres] = 0
                mask[:, -thres:] = 0
                mask = mask / 255.

                mask = cv2.resize(mask, restored_face.shape[:2])
                mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up), flags=3)
                inv_soft_mask = mask[:, :, None]
                pasted_face = inv_restored

            else:  # use square parse maps
                mask = np.ones(self.face_size, dtype=np.float32)
                inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
                # remove the black borders
                inv_mask_erosion = cv2.erode(
                    inv_mask, np.ones((int(2 * self.upscale_factor), int(2 * self.upscale_factor)), np.uint8))
                pasted_face = inv_mask_erosion[:, :, None] * inv_restored
                total_face_area = np.sum(inv_mask_erosion)  # // 3
                # compute the fusion edge based on the area of face
                w_edge = int(total_face_area**0.5) // 20
                erosion_radius = w_edge * 2
                inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
                blur_size = w_edge * 2
                inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
                if len(upsample_img.shape) == 2:  # upsample_img is gray image
                    upsample_img = upsample_img[:, :, None]
                inv_soft_mask = inv_soft_mask[:, :, None]

            if len(upsample_img.shape) == 3 and upsample_img.shape[2] == 4:  # alpha channel
                alpha = upsample_img[:, :, 3:]
                upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img[:, :, 0:3]
                upsample_img = np.concatenate((upsample_img, alpha), axis=2)
            else:
                upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img

        if np.max(upsample_img) > 256:  # 16-bit image
            upsample_img = upsample_img.astype(np.uint16)
        else:
            upsample_img = upsample_img.astype(np.uint8)
        if save_path is not None:
            path = os.path.splitext(save_path)[0]
            save_path = f'{path}.{self.save_ext}'
            imwrite(upsample_img, save_path)
        return upsample_img
Example #13
0
    def __init__(
        self,
        root: str = default_dataset_path("voc-detection"),
        train: bool = True,
        rand_trans: bool = False,
        download: bool = True,
        year: str = "2012",
        image_size: int = 300,
        preprocessing_type: str = None,
        default_boxes: DefaultBoxes = None,
    ):
        if torchvision_import_error is not None:
            raise torchvision_import_error
        if VOCDetection == object:
            raise ValueError(
                "VOC is unsupported on this torchvision version, please upgrade to use"
            )
        if preprocessing_type not in [None, "yolo", "ssd"]:
            raise ValueError(
                "preprocessing type {} not supported, valid values are: {}".
                format(preprocessing_type, [None, "yolo", "ssd"]))

        root = os.path.abspath(os.path.expanduser(root))
        trans = [
            # process annotations
            lambda img, ann: (img, _extract_bounding_box_and_labels(img, ann)),
        ]
        if rand_trans:
            # add random crop, flip, and jitter to pipeline
            jitter_fn = ColorJitter(brightness=0.125,
                                    contrast=0.5,
                                    saturation=0.5,
                                    hue=0.05)
            trans.extend([
                # Random cropping as implemented in SSD paper
                ssd_random_crop_image_and_annotations,
                # random horizontal flip
                random_horizontal_flip_image_and_annotations,
                # image color jitter
                lambda img, ann: (jitter_fn(img), ann),
            ])
        trans.extend([
            # resize image
            lambda img, ann: (F.resize(img, (image_size, image_size)), ann),
            # Convert image to tensor
            lambda img, ann: (F.to_tensor(img), ann),
        ])
        # Normalize image except for yolo preprocessing
        if preprocessing_type != "yolo":
            trans.append(lambda img, ann: (
                F.normalize(img, IMAGENET_RGB_MEANS, IMAGENET_RGB_STDS),
                ann,
            ))

        if preprocessing_type == "ssd":
            default_boxes = default_boxes or get_default_boxes_300(voc=True)
            # encode the bounding boxes and labels with the default boxes
            trans.append(lambda img, ann: (
                img,
                (
                    *default_boxes.encode_image_box_labels(*ann),
                    ann,
                ),  # encoded_boxes, encoded_labels, original_annotations
            ))
        elif preprocessing_type == "yolo":
            trans.append(lambda img, ann: (
                img,
                (bounding_box_and_labels_to_yolo_fmt(ann), ann),
            ))
        super().__init__(
            root,
            year=year,
            image_set="train" if train else "val",
            download=download,
            transforms=AnnotatedImageTransforms(trans),
        )
        self._default_boxes = default_boxes
Example #14
0
 def __call__(self, sample):
     # print(type(img), type(target), type(bbox_target))
     sample['image'] = F.normalize(sample['image'], self.mean, self.std)
     return sample
Example #15
0
 def __call__(self, tensor):
     # (B, C, H, W) tensor
     dtype = tensor.dtype
     mean = torch.as_tensor(self.mean, dtype=dtype, device=tensor.device)
     std = torch.as_tensor(self.std, dtype=dtype, device=tensor.device)
     return F.normalize(tensor, mean=mean, std=std, inplace=False)
    def track(self, image, info: dict = None) -> dict:
        w_x = self.size[0] + (4 - 1) * ((self.size[0] + self.size[1]) * 0.5)
        h_x = self.size[1] + (4 - 1) * ((self.size[0] + self.size[1]) * 0.5)
        s_x = math.ceil(math.sqrt(w_x * h_x))

        x_crop = self.get_subwindow(image,
                                    self.center_pos, cfg.TRACK.INSTANCE_SIZE,
                                    round(s_x), self.channel_average)
        x_crop = x_crop.float().mul(1.0 / 255.0).clamp(0.0, 1.0)
        x_crop[0] = tvisf.normalize(x_crop[0], self.mean, self.std,
                                    self.inplace)
        with torch.no_grad():
            outputs = self.net.track(x_crop, info)
        score = self._convert_score(outputs['pred_logits'])
        pred_bbox = self._convert_bbox(outputs['pred_boxes'])

        # def change(r):
        #     return np.maximum(r, 1. / r)
        #
        # def sz(w, h):
        #     pad = (w + h) * 0.5
        #     return np.sqrt((w + pad) * (h + pad))
        # # pred_box:cx,cy,w,h
        # # scale penalty
        # s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
        #              (sz(self.size[0]/s_x, self.size[1]/s_x)))
        #
        # # aspect ratio penalty
        # r_c = change((self.size[0]/self.size[1]) /
        #              (pred_bbox[2, :]/pred_bbox[3, :]))
        # penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K)
        # pscore = penalty * score

        # window penalty
        pscore = score * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \
                 self.window * cfg.TRACK.WINDOW_INFLUENCE
        # pscore = score
        best_idx = np.argmax(pscore)

        bbox = pred_bbox[:, best_idx]

        bbox = bbox * s_x
        cx = bbox[0] + self.center_pos[0] - s_x / 2
        cy = bbox[1] + self.center_pos[1] - s_x / 2

        # smooth bbox
        # no penaty
        width = bbox[2]
        height = bbox[3]

        # clip boundary
        cx, cy, width, height = self._bbox_clip(cx, cy, width, height,
                                                image.shape[:2])

        # update state
        self.center_pos = np.array([cx, cy])
        self.size = np.array([width, height])

        bbox = [cx - width / 2, cy - height / 2, width, height]

        out = {'target_bbox': bbox, 'best_score': pscore}
        return out
 def __call__(self, sample):
     tensor = sample['image']
     sample['image'] = functional.normalize(
         tensor, self.mean, self.std, self.inplace)
     return sample
 def __call__(self, tensor, lbl):
     return F.normalize(tensor, self.mean, self.std), lbl
Example #19
0
 def __call__(self, image, boxes, masks, im_info):
     if self.to_bgr255:
         image = image[[2, 1, 0]] * 255
     image = F.normalize(image, mean=self.mean, std=self.std)
     return image, boxes, masks, im_info
Example #20
0
 def image_to_tensor(self, img):
     tensor = FT.to_tensor(img)
     tensor = FT.normalize(tensor, [0.485, 0.456, 0.406],
                           [0.229, 0.224, 0.225])
     return tensor
Example #21
0
def load_to_queue(image_queue, files, device, queue_size, downsample):
    """
    Description
    -----------
    Whenever necessary, loads images, moves them to GPU, and adds them to a shared
    multiprocessing queue with the goal of the queue always having a certain size.
    Process is to be called as a worker by FrameLoader object
    
    Parameters
    ----------
    image_queue : multiprocessing Queue
        shared queue in which preprocessed images are put.
    files : list of str
        each str is path to one file in track directory
    det_step : int
        specifies number of frames between dense detections 
    init_frames : int
        specifies number of dense detections before localization begins
    device : torch.device
        Specifies whether images should be put on CPU or GPU.
    queue_size : int, optional
        Goal size of queue, whenever actual size is less additional images will
        be processed and added. The default is 5.
    """

    frame_idx = 0
    while frame_idx < len(files):

        if image_queue.qsize() < queue_size:

            # load next image
            with Image.open(files[frame_idx]) as im:

                # if frame_idx % det_step.value < init_frames:
                #     # convert to CV2 style image
                #     open_cv_image = np.array(im)
                #     im = open_cv_image.copy()
                #     original_im = im[:,:,[2,1,0]].copy()
                #     # new stuff
                #     dim = (im.shape[1], im.shape[0])
                #     im = cv2.resize(im, (1920,1080))
                #     im = im.transpose((2,0,1)).copy()
                #     im = torch.from_numpy(im).float().div(255.0).unsqueeze(0)
                #     dim = torch.FloatTensor(dim).repeat(1,2)
                #     dim = dim.to(device,non_blocking = True)
                # else:
                # keep as tensor
                original_im = np.array(im)[:, :, [2, 1, 0]].copy()
                im = F.resize(im, (int(
                    im.size[1] // downsample), int(im.size[0] // downsample)))
                im = F.to_tensor(im)
                im = F.normalize(im,
                                 mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
                dim = None

                # store preprocessed image, dimensions and original image
                im = im.to(device)
                frame = (frame_idx, im, dim, original_im)

                # append to queue
                image_queue.put(frame)

            frame_idx += 1

    # neverending loop, because if the process ends, the tensors originally
    # initialized in this function will be deleted, causing issues. Thus, this
    # function runs until a call to self.next() returns -1, indicating end of track
    # has been reached
    while True:
        time.sleep(5)
Example #22
0
 def __call__(self, sample):
     image, keypoints = sample['image'], sample['keypoints']
     image = F.normalize(image, self.mean, self.std)
     return {'image': image, 'keypoints': keypoints}
# :func:`~torchvision.models.segmentation.fcn_resnet50`.  You can also try using
# DeepLabv3 (:func:`~torchvision.models.segmentation.deeplabv3_resnet50`) or
# lraspp mobilenet models
# (:func:`~torchvision.models.segmentation.lraspp_mobilenet_v3_large`).
#
# Let's start by looking at the output of the model. Remember that in general,
# images must be normalized before they're passed to a semantic segmentation
# model.

from torchvision.models.segmentation import fcn_resnet50

model = fcn_resnet50(pretrained=True, progress=False)
model = model.eval()

normalized_batch = F.normalize(batch,
                               mean=(0.485, 0.456, 0.406),
                               std=(0.229, 0.224, 0.225))
output = model(normalized_batch)['out']
print(output.shape, output.min().item(), output.max().item())

#####################################
# As we can see above, the output of the segmentation model is a tensor of shape
# ``(batch_size, num_classes, H, W)``. Each value is a non-normalized score, and
# we can normalize them into ``[0, 1]`` by using a softmax. After the softmax,
# we can interpret each value as a probability indicating how likely a given
# pixel is to belong to a given class.
#
# Let's plot the masks that have been detected for the dog class and for the
# boat class:

sem_classes = [
Example #24
0
 def __call__(self, image, bbox):
     image = F.normalize(image, mean=self.mean, std=self.std)
     bbox /= 128
     return image, bbox
Example #25
0
def transform(image,
              label,
              logits=None,
              crop_size=(512, 512),
              scale_size=(0.8, 1.0),
              augmentation=True):
    # Random rescale image
    raw_w, raw_h = image.size
    scale_ratio = random.uniform(scale_size[0], scale_size[1])

    resized_size = (int(raw_h * scale_ratio), int(raw_w * scale_ratio))
    image = transforms_f.resize(image, resized_size, Image.BILINEAR)
    label = transforms_f.resize(label, resized_size, Image.NEAREST)
    if logits is not None:
        logits = transforms_f.resize(logits, resized_size, Image.NEAREST)

    # Add padding if rescaled image size is less than crop size
    if crop_size == -1:  # use original im size without crop or padding
        crop_size = (raw_h, raw_w)

    if crop_size[0] > resized_size[0] or crop_size[1] > resized_size[1]:
        right_pad, bottom_pad = max(crop_size[1] - resized_size[1],
                                    0), max(crop_size[0] - resized_size[0], 0)
        image = transforms_f.pad(image,
                                 padding=(0, 0, right_pad, bottom_pad),
                                 padding_mode='reflect')
        label = transforms_f.pad(label,
                                 padding=(0, 0, right_pad, bottom_pad),
                                 fill=255,
                                 padding_mode='constant')
        if logits is not None:
            logits = transforms_f.pad(logits,
                                      padding=(0, 0, right_pad, bottom_pad),
                                      fill=0,
                                      padding_mode='constant')

    # Random Cropping
    i, j, h, w = transforms.RandomCrop.get_params(image, output_size=crop_size)
    image = transforms_f.crop(image, i, j, h, w)
    label = transforms_f.crop(label, i, j, h, w)
    if logits is not None:
        logits = transforms_f.crop(logits, i, j, h, w)

    if augmentation:
        # Random color jitter
        if torch.rand(1) > 0.2:
            #  color_transform = transforms.ColorJitter((0.75, 1.25), (0.75, 1.25), (0.75, 1.25), (-0.25, 0.25))  For PyTorch 1.9/TorchVision 0.10 users
            color_transform = transforms.ColorJitter.get_params(
                (0.75, 1.25), (0.75, 1.25), (0.75, 1.25), (-0.25, 0.25))
            image = color_transform(image)

        # Random Gaussian filter
        if torch.rand(1) > 0.5:
            sigma = random.uniform(0.15, 1.15)
            image = image.filter(ImageFilter.GaussianBlur(radius=sigma))

        # Random horizontal flipping
        if torch.rand(1) > 0.5:
            image = transforms_f.hflip(image)
            label = transforms_f.hflip(label)
            if logits is not None:
                logits = transforms_f.hflip(logits)

    # Transform to tensor
    image = transforms_f.to_tensor(image)
    label = (transforms_f.to_tensor(label) * 255).long()
    label[label == 255] = -1  # invalid pixels are re-mapped to index -1
    if logits is not None:
        logits = transforms_f.to_tensor(logits)

    # Apply (ImageNet) normalisation
    image = transforms_f.normalize(image,
                                   mean=[0.485, 0.456, 0.406],
                                   std=[0.229, 0.224, 0.225])
    if logits is not None:
        return image, label, logits
    else:
        return image, label
Example #26
0
 def __call__(self, tensor):
     if isinstance(tensor, np.ndarray):
         return (tensor - self._mean.reshape(-1, 1, 1)) / self._std.reshape(
             -1, 1, 1)
     return normalize(tensor, self._mean, self._std)
Example #27
0
 def __call__(self, tensor):
     return F.normalize(tensor[0], self.mean, self.std, self.inplace), F.normalize(tensor[1], self.mean, self.std, self.inplace), F.normalize(tensor[2], self.mean, self.std, self.inplace)
Example #28
0
 def __call__(self, sample):
     uv_map, origin = sample['uv_map'], sample['origin']
     origin = F.normalize(origin, self.mean, self.std, self.inplace)
     return {'uv_map': uv_map, 'origin': origin}
Example #29
0
 def __getitem__(self, index):
     img = torch.rand(*self.shape)
     target = 0  # Dummy target value
     return F.normalize(img, normalizing_mean, normalizing_std), target
def img_to_tensor(im, normalize=None):
    tensor = torch.from_numpy(np.moveaxis(im / (255. if im.dtype == np.uint8 else 1), -1, 0).astype(np.float32))
    if normalize is not None:
        return F.normalize(tensor, **normalize)
    return tensor
Example #31
0
    img2_path = os.path.join(test_dir_str, name2)
    img1m_path = os.path.join(test_dir_str, name1m)
    img2m_path = os.path.join(test_dir_str, name2m)

    '''
    img1 = ttf.to_tensor(ttf.resize(Image.open(img1_path), 112))
    img2 = ttf.to_tensor(ttf.resize(Image.open(img2_path), 112))
    img1m = ttf.to_tensor(ttf.resize(Image.open(img1m_path), 112))
    img2m = ttf.to_tensor(ttf.resize(Image.open(img2m_path), 112))
    '''
    img1 = ttf.to_tensor(Image.open(img1_path))
    img2 = ttf.to_tensor(Image.open(img2_path))
    img1m = ttf.to_tensor(Image.open(img1m_path))
    img2m = ttf.to_tensor(Image.open(img2m_path))

    img1 = ttf.normalize(img1, [0.5, 0.5, 0.5], [0.2, 0.2, 0.2])
    img2 = ttf.normalize(img2, [0.5, 0.5, 0.5], [0.2, 0.2, 0.2])
    img1m = ttf.normalize(img1m, [0.5, 0.5, 0.5], [0.2, 0.2, 0.2])
    img2m = ttf.normalize(img2m, [0.5, 0.5, 0.5], [0.2, 0.2, 0.2])

    img1=Variable(img1.cuda())
    img2 = Variable(img2.cuda())
    img1m = Variable(img1m.cuda())
    img2m = Variable(img2m.cuda())

    imgs=torch.stack([img1,img2,img1m,img2m], dim=0)

    #print(imgs) torch.cuda.FloatTensor of size 2x3x160x160 (GPU 0)

    output = net(imgs)
    f = output.data