def __call__(self, image, target): image = F.normalize(image, mean=self.mean, std=self.std) return image, target
def forward(self, img): img = torch.stack([F.normalize(i, mean, std) for i in img]) x = self.base(img) # x is a dict return x
def __call__(self, img, mask): return normalize(img, self.mean, self.std, False), mask
def __call__(self, image, target): if self.to_bgr255: image = image[[2, 1, 0]] * 255 image = F.normalize(image, mean=self.mean, std=self.std) return image, target
def to_tensor_norm(self, img): img = Image.fromarray(img) img_t = F.to_tensor(img).float() img_t = F.normalize(img_t, self.mean, self.std) # 输入mean 和 std return img_t
def __call__(self, results): results['img'] = F.normalize( results['img'], mean=self.mean, std=self.std) return results
def __call__(self, sample): image, label = sample['image'], sample['label'] image = F.normalize(image, self.mean, self.std) return {'image': image, 'label': label}
def __call__(self, images): normalized = np.stack([ F.normalize(x, self.mean, self.std, self.inplace) for x in images ]) return normalized
def transform_image(self, image): return tvisf.normalize(image, self.mean, self.std, self.inplace)
def __call__(self, image, cropped_image, target, **kwargs): output_image = F.normalize(image, mean=self.mean, std=self.std) output_cropped_image = F.normalize(cropped_image, mean=self.mean, std=self.std) return output_image, output_cropped_image, target
# get starting error degradation = np.array([ 2, 2, 4, 0.01 ]) * 0 # should roughly equal localizer error covariance skew = np.random.normal(0, degradation, (len(batch), 4)) gt_skew = gt + skew skewed_iou.append(iou(gt_skew, gt)) # ims are collated by frame,then batch index relevant_ims = ims[frame_idx] frames = [] for idx, item in enumerate(relevant_ims): with Image.open(item) as im: im = F.to_tensor(im) frame = F.normalize(im, mean=[0.3721, 0.3880, 0.3763], std=[0.0555, 0.0584, 0.0658]) #correct smaller frames if frame.shape[1] < 375: new_frame = torch.zeros([3, 375, frame.shape[2]]) new_frame[:, :frame.shape[1], :] = frame frame = new_frame if frame.shape[2] < 1242: new_frame = torch.zeros([3, 375, 1242]) new_frame[:, :, :frame.shape[2]] = frame frame = new_frame MASK = False if MASK: other_objs = dataset.frame_objs[item]
def paste_faces_to_input_image(self, save_path=None, upsample_img=None): h, w, _ = self.input_img.shape h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor) if upsample_img is None: # simply resize the background upsample_img = cv2.resize(self.input_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4) else: upsample_img = cv2.resize(upsample_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4) assert len(self.restored_faces) == len( self.inverse_affine_matrices), ('length of restored_faces and affine_matrices are different.') for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices): # Add an offset to inverse affine matrix, for more precise back alignment if self.upscale_factor > 1: extra_offset = 0.5 * self.upscale_factor else: extra_offset = 0 inverse_affine[:, 2] += extra_offset inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up)) if self.use_parse: # inference face_input = cv2.resize(restored_face, (512, 512), interpolation=cv2.INTER_LINEAR) face_input = img2tensor(face_input.astype('float32') / 255., bgr2rgb=True, float32=True) normalize(face_input, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True) face_input = torch.unsqueeze(face_input, 0).to(self.device) with torch.no_grad(): out = self.face_parse(face_input)[0] out = out.argmax(dim=1).squeeze().cpu().numpy() mask = np.zeros(out.shape) MASK_COLORMAP = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 0, 0, 0] for idx, color in enumerate(MASK_COLORMAP): mask[out == idx] = color # blur the mask mask = cv2.GaussianBlur(mask, (101, 101), 11) mask = cv2.GaussianBlur(mask, (101, 101), 11) # remove the black borders thres = 10 mask[:thres, :] = 0 mask[-thres:, :] = 0 mask[:, :thres] = 0 mask[:, -thres:] = 0 mask = mask / 255. mask = cv2.resize(mask, restored_face.shape[:2]) mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up), flags=3) inv_soft_mask = mask[:, :, None] pasted_face = inv_restored else: # use square parse maps mask = np.ones(self.face_size, dtype=np.float32) inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up)) # remove the black borders inv_mask_erosion = cv2.erode( inv_mask, np.ones((int(2 * self.upscale_factor), int(2 * self.upscale_factor)), np.uint8)) pasted_face = inv_mask_erosion[:, :, None] * inv_restored total_face_area = np.sum(inv_mask_erosion) # // 3 # compute the fusion edge based on the area of face w_edge = int(total_face_area**0.5) // 20 erosion_radius = w_edge * 2 inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8)) blur_size = w_edge * 2 inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0) if len(upsample_img.shape) == 2: # upsample_img is gray image upsample_img = upsample_img[:, :, None] inv_soft_mask = inv_soft_mask[:, :, None] if len(upsample_img.shape) == 3 and upsample_img.shape[2] == 4: # alpha channel alpha = upsample_img[:, :, 3:] upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img[:, :, 0:3] upsample_img = np.concatenate((upsample_img, alpha), axis=2) else: upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img if np.max(upsample_img) > 256: # 16-bit image upsample_img = upsample_img.astype(np.uint16) else: upsample_img = upsample_img.astype(np.uint8) if save_path is not None: path = os.path.splitext(save_path)[0] save_path = f'{path}.{self.save_ext}' imwrite(upsample_img, save_path) return upsample_img
def __init__( self, root: str = default_dataset_path("voc-detection"), train: bool = True, rand_trans: bool = False, download: bool = True, year: str = "2012", image_size: int = 300, preprocessing_type: str = None, default_boxes: DefaultBoxes = None, ): if torchvision_import_error is not None: raise torchvision_import_error if VOCDetection == object: raise ValueError( "VOC is unsupported on this torchvision version, please upgrade to use" ) if preprocessing_type not in [None, "yolo", "ssd"]: raise ValueError( "preprocessing type {} not supported, valid values are: {}". format(preprocessing_type, [None, "yolo", "ssd"])) root = os.path.abspath(os.path.expanduser(root)) trans = [ # process annotations lambda img, ann: (img, _extract_bounding_box_and_labels(img, ann)), ] if rand_trans: # add random crop, flip, and jitter to pipeline jitter_fn = ColorJitter(brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05) trans.extend([ # Random cropping as implemented in SSD paper ssd_random_crop_image_and_annotations, # random horizontal flip random_horizontal_flip_image_and_annotations, # image color jitter lambda img, ann: (jitter_fn(img), ann), ]) trans.extend([ # resize image lambda img, ann: (F.resize(img, (image_size, image_size)), ann), # Convert image to tensor lambda img, ann: (F.to_tensor(img), ann), ]) # Normalize image except for yolo preprocessing if preprocessing_type != "yolo": trans.append(lambda img, ann: ( F.normalize(img, IMAGENET_RGB_MEANS, IMAGENET_RGB_STDS), ann, )) if preprocessing_type == "ssd": default_boxes = default_boxes or get_default_boxes_300(voc=True) # encode the bounding boxes and labels with the default boxes trans.append(lambda img, ann: ( img, ( *default_boxes.encode_image_box_labels(*ann), ann, ), # encoded_boxes, encoded_labels, original_annotations )) elif preprocessing_type == "yolo": trans.append(lambda img, ann: ( img, (bounding_box_and_labels_to_yolo_fmt(ann), ann), )) super().__init__( root, year=year, image_set="train" if train else "val", download=download, transforms=AnnotatedImageTransforms(trans), ) self._default_boxes = default_boxes
def __call__(self, sample): # print(type(img), type(target), type(bbox_target)) sample['image'] = F.normalize(sample['image'], self.mean, self.std) return sample
def __call__(self, tensor): # (B, C, H, W) tensor dtype = tensor.dtype mean = torch.as_tensor(self.mean, dtype=dtype, device=tensor.device) std = torch.as_tensor(self.std, dtype=dtype, device=tensor.device) return F.normalize(tensor, mean=mean, std=std, inplace=False)
def track(self, image, info: dict = None) -> dict: w_x = self.size[0] + (4 - 1) * ((self.size[0] + self.size[1]) * 0.5) h_x = self.size[1] + (4 - 1) * ((self.size[0] + self.size[1]) * 0.5) s_x = math.ceil(math.sqrt(w_x * h_x)) x_crop = self.get_subwindow(image, self.center_pos, cfg.TRACK.INSTANCE_SIZE, round(s_x), self.channel_average) x_crop = x_crop.float().mul(1.0 / 255.0).clamp(0.0, 1.0) x_crop[0] = tvisf.normalize(x_crop[0], self.mean, self.std, self.inplace) with torch.no_grad(): outputs = self.net.track(x_crop, info) score = self._convert_score(outputs['pred_logits']) pred_bbox = self._convert_bbox(outputs['pred_boxes']) # def change(r): # return np.maximum(r, 1. / r) # # def sz(w, h): # pad = (w + h) * 0.5 # return np.sqrt((w + pad) * (h + pad)) # # pred_box:cx,cy,w,h # # scale penalty # s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) / # (sz(self.size[0]/s_x, self.size[1]/s_x))) # # # aspect ratio penalty # r_c = change((self.size[0]/self.size[1]) / # (pred_bbox[2, :]/pred_bbox[3, :])) # penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K) # pscore = penalty * score # window penalty pscore = score * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \ self.window * cfg.TRACK.WINDOW_INFLUENCE # pscore = score best_idx = np.argmax(pscore) bbox = pred_bbox[:, best_idx] bbox = bbox * s_x cx = bbox[0] + self.center_pos[0] - s_x / 2 cy = bbox[1] + self.center_pos[1] - s_x / 2 # smooth bbox # no penaty width = bbox[2] height = bbox[3] # clip boundary cx, cy, width, height = self._bbox_clip(cx, cy, width, height, image.shape[:2]) # update state self.center_pos = np.array([cx, cy]) self.size = np.array([width, height]) bbox = [cx - width / 2, cy - height / 2, width, height] out = {'target_bbox': bbox, 'best_score': pscore} return out
def __call__(self, sample): tensor = sample['image'] sample['image'] = functional.normalize( tensor, self.mean, self.std, self.inplace) return sample
def __call__(self, tensor, lbl): return F.normalize(tensor, self.mean, self.std), lbl
def __call__(self, image, boxes, masks, im_info): if self.to_bgr255: image = image[[2, 1, 0]] * 255 image = F.normalize(image, mean=self.mean, std=self.std) return image, boxes, masks, im_info
def image_to_tensor(self, img): tensor = FT.to_tensor(img) tensor = FT.normalize(tensor, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) return tensor
def load_to_queue(image_queue, files, device, queue_size, downsample): """ Description ----------- Whenever necessary, loads images, moves them to GPU, and adds them to a shared multiprocessing queue with the goal of the queue always having a certain size. Process is to be called as a worker by FrameLoader object Parameters ---------- image_queue : multiprocessing Queue shared queue in which preprocessed images are put. files : list of str each str is path to one file in track directory det_step : int specifies number of frames between dense detections init_frames : int specifies number of dense detections before localization begins device : torch.device Specifies whether images should be put on CPU or GPU. queue_size : int, optional Goal size of queue, whenever actual size is less additional images will be processed and added. The default is 5. """ frame_idx = 0 while frame_idx < len(files): if image_queue.qsize() < queue_size: # load next image with Image.open(files[frame_idx]) as im: # if frame_idx % det_step.value < init_frames: # # convert to CV2 style image # open_cv_image = np.array(im) # im = open_cv_image.copy() # original_im = im[:,:,[2,1,0]].copy() # # new stuff # dim = (im.shape[1], im.shape[0]) # im = cv2.resize(im, (1920,1080)) # im = im.transpose((2,0,1)).copy() # im = torch.from_numpy(im).float().div(255.0).unsqueeze(0) # dim = torch.FloatTensor(dim).repeat(1,2) # dim = dim.to(device,non_blocking = True) # else: # keep as tensor original_im = np.array(im)[:, :, [2, 1, 0]].copy() im = F.resize(im, (int( im.size[1] // downsample), int(im.size[0] // downsample))) im = F.to_tensor(im) im = F.normalize(im, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dim = None # store preprocessed image, dimensions and original image im = im.to(device) frame = (frame_idx, im, dim, original_im) # append to queue image_queue.put(frame) frame_idx += 1 # neverending loop, because if the process ends, the tensors originally # initialized in this function will be deleted, causing issues. Thus, this # function runs until a call to self.next() returns -1, indicating end of track # has been reached while True: time.sleep(5)
def __call__(self, sample): image, keypoints = sample['image'], sample['keypoints'] image = F.normalize(image, self.mean, self.std) return {'image': image, 'keypoints': keypoints}
# :func:`~torchvision.models.segmentation.fcn_resnet50`. You can also try using # DeepLabv3 (:func:`~torchvision.models.segmentation.deeplabv3_resnet50`) or # lraspp mobilenet models # (:func:`~torchvision.models.segmentation.lraspp_mobilenet_v3_large`). # # Let's start by looking at the output of the model. Remember that in general, # images must be normalized before they're passed to a semantic segmentation # model. from torchvision.models.segmentation import fcn_resnet50 model = fcn_resnet50(pretrained=True, progress=False) model = model.eval() normalized_batch = F.normalize(batch, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) output = model(normalized_batch)['out'] print(output.shape, output.min().item(), output.max().item()) ##################################### # As we can see above, the output of the segmentation model is a tensor of shape # ``(batch_size, num_classes, H, W)``. Each value is a non-normalized score, and # we can normalize them into ``[0, 1]`` by using a softmax. After the softmax, # we can interpret each value as a probability indicating how likely a given # pixel is to belong to a given class. # # Let's plot the masks that have been detected for the dog class and for the # boat class: sem_classes = [
def __call__(self, image, bbox): image = F.normalize(image, mean=self.mean, std=self.std) bbox /= 128 return image, bbox
def transform(image, label, logits=None, crop_size=(512, 512), scale_size=(0.8, 1.0), augmentation=True): # Random rescale image raw_w, raw_h = image.size scale_ratio = random.uniform(scale_size[0], scale_size[1]) resized_size = (int(raw_h * scale_ratio), int(raw_w * scale_ratio)) image = transforms_f.resize(image, resized_size, Image.BILINEAR) label = transforms_f.resize(label, resized_size, Image.NEAREST) if logits is not None: logits = transforms_f.resize(logits, resized_size, Image.NEAREST) # Add padding if rescaled image size is less than crop size if crop_size == -1: # use original im size without crop or padding crop_size = (raw_h, raw_w) if crop_size[0] > resized_size[0] or crop_size[1] > resized_size[1]: right_pad, bottom_pad = max(crop_size[1] - resized_size[1], 0), max(crop_size[0] - resized_size[0], 0) image = transforms_f.pad(image, padding=(0, 0, right_pad, bottom_pad), padding_mode='reflect') label = transforms_f.pad(label, padding=(0, 0, right_pad, bottom_pad), fill=255, padding_mode='constant') if logits is not None: logits = transforms_f.pad(logits, padding=(0, 0, right_pad, bottom_pad), fill=0, padding_mode='constant') # Random Cropping i, j, h, w = transforms.RandomCrop.get_params(image, output_size=crop_size) image = transforms_f.crop(image, i, j, h, w) label = transforms_f.crop(label, i, j, h, w) if logits is not None: logits = transforms_f.crop(logits, i, j, h, w) if augmentation: # Random color jitter if torch.rand(1) > 0.2: # color_transform = transforms.ColorJitter((0.75, 1.25), (0.75, 1.25), (0.75, 1.25), (-0.25, 0.25)) For PyTorch 1.9/TorchVision 0.10 users color_transform = transforms.ColorJitter.get_params( (0.75, 1.25), (0.75, 1.25), (0.75, 1.25), (-0.25, 0.25)) image = color_transform(image) # Random Gaussian filter if torch.rand(1) > 0.5: sigma = random.uniform(0.15, 1.15) image = image.filter(ImageFilter.GaussianBlur(radius=sigma)) # Random horizontal flipping if torch.rand(1) > 0.5: image = transforms_f.hflip(image) label = transforms_f.hflip(label) if logits is not None: logits = transforms_f.hflip(logits) # Transform to tensor image = transforms_f.to_tensor(image) label = (transforms_f.to_tensor(label) * 255).long() label[label == 255] = -1 # invalid pixels are re-mapped to index -1 if logits is not None: logits = transforms_f.to_tensor(logits) # Apply (ImageNet) normalisation image = transforms_f.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if logits is not None: return image, label, logits else: return image, label
def __call__(self, tensor): if isinstance(tensor, np.ndarray): return (tensor - self._mean.reshape(-1, 1, 1)) / self._std.reshape( -1, 1, 1) return normalize(tensor, self._mean, self._std)
def __call__(self, tensor): return F.normalize(tensor[0], self.mean, self.std, self.inplace), F.normalize(tensor[1], self.mean, self.std, self.inplace), F.normalize(tensor[2], self.mean, self.std, self.inplace)
def __call__(self, sample): uv_map, origin = sample['uv_map'], sample['origin'] origin = F.normalize(origin, self.mean, self.std, self.inplace) return {'uv_map': uv_map, 'origin': origin}
def __getitem__(self, index): img = torch.rand(*self.shape) target = 0 # Dummy target value return F.normalize(img, normalizing_mean, normalizing_std), target
def img_to_tensor(im, normalize=None): tensor = torch.from_numpy(np.moveaxis(im / (255. if im.dtype == np.uint8 else 1), -1, 0).astype(np.float32)) if normalize is not None: return F.normalize(tensor, **normalize) return tensor
img2_path = os.path.join(test_dir_str, name2) img1m_path = os.path.join(test_dir_str, name1m) img2m_path = os.path.join(test_dir_str, name2m) ''' img1 = ttf.to_tensor(ttf.resize(Image.open(img1_path), 112)) img2 = ttf.to_tensor(ttf.resize(Image.open(img2_path), 112)) img1m = ttf.to_tensor(ttf.resize(Image.open(img1m_path), 112)) img2m = ttf.to_tensor(ttf.resize(Image.open(img2m_path), 112)) ''' img1 = ttf.to_tensor(Image.open(img1_path)) img2 = ttf.to_tensor(Image.open(img2_path)) img1m = ttf.to_tensor(Image.open(img1m_path)) img2m = ttf.to_tensor(Image.open(img2m_path)) img1 = ttf.normalize(img1, [0.5, 0.5, 0.5], [0.2, 0.2, 0.2]) img2 = ttf.normalize(img2, [0.5, 0.5, 0.5], [0.2, 0.2, 0.2]) img1m = ttf.normalize(img1m, [0.5, 0.5, 0.5], [0.2, 0.2, 0.2]) img2m = ttf.normalize(img2m, [0.5, 0.5, 0.5], [0.2, 0.2, 0.2]) img1=Variable(img1.cuda()) img2 = Variable(img2.cuda()) img1m = Variable(img1m.cuda()) img2m = Variable(img2m.cuda()) imgs=torch.stack([img1,img2,img1m,img2m], dim=0) #print(imgs) torch.cuda.FloatTensor of size 2x3x160x160 (GPU 0) output = net(imgs) f = output.data