def predict_image(image): image_tensor = transforms(image).float() image_tensor = image_tensor.unsqueeze_(0) input = Variable(image_tensor) input = input.to(device) final, residual_img, upscaled_image, com_img, orig_im = model(input) return
def classify_eye(eye_image: Image) -> Tuple[str, float]: """ Classify the eye colour in an image of an eye The classes that are used are: amber, blue, brown, gray, grayscale, green, hazel, and red Returns the class as a string and the confidence of the prediction """ tensor = transforms(eye_image).unsqueeze(0).to(device) class_outputs = classifier(tensor) class_prediction = torch.argmax(class_outputs).item() confidence = torch.nn.functional.softmax(class_outputs.squeeze(), dim=0).data.cpu().numpy() return classes[class_prediction], confidence[class_prediction]
def __getitem__(self, index: int) -> Tuple[Any, Any]: coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) bboxes = coco.loadAnns(ann_ids) path = coco.loadImgs(img_id)[0]['file_name'] # image = Image.open(os.path.join(self.root, path)).convert('RGB') image = Image.open(os.path.join(self.root, path)).convert('RGB') image = np.array(image) IMAGE_SIZE = 416 import albumentations as A from albumentations.pytorch import ToTensorV2 import cv2 transforms = A.Compose([ A.LongestMaxSize(max_size=int(IMAGE_SIZE)), A.PadIfNeeded(min_height=int(IMAGE_SIZE), min_width=int(IMAGE_SIZE), border_mode=cv2.BORDER_CONSTANT), A.RandomCrop(width=IMAGE_SIZE, height=IMAGE_SIZE), A.ColorJitter( brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.6), A.ShiftScaleRotate( rotate_limit=10, p=0.4, border_mode=cv2.BORDER_CONSTANT), A.HorizontalFlip(p=0.5), A.Blur(p=0.2), A.CLAHE(p=0.2), A.Posterize(p=0.2), A.ToGray(p=0.1), ToTensorV2() ], bbox_params=A.BboxParams(format="coco", min_visibility=0.4, label_fields=[])) argumentations = transforms(image=image, bboxes=[[12, 23, 43, 34]]) image = argumentations['image'] bboxes = argumentations['bboxes'] target = torch.tensor([0, 0, 0, 0]) return image, target
def __call__(self, Sample, *Consistent_Flip): if Consistent_Flip: Sample = torchvision.transforms.RandomHorizontalFlip() Output = [transforms(Sample) for transforms in self.transforms] return Output
def get_batch(self, img_pil, mask_pil, transforms, idx, class_pil=None, void=255, inst_pil=None, name=None, points=None): if inst_pil is not None: mask = np.array(inst_pil) mask_void = mask == void mask[mask_void] = 0 else: mask = np.array(mask_pil) mask_void = mask == void mask[mask_void] = 0 # instances are encoded as different colors obj_ids = np.unique(mask) # first id is the background, so remove it obj_ids = obj_ids[1:] # split the color-encoded mask into a set # of binary masks masks = mask == obj_ids[:, None, None] # get bounding box coordinates for each mask num_objs = len(obj_ids) # if num_objs == 0: # raise ValueError('should have car') boxes = [] for i in range(num_objs): pos = np.where(masks[i]) xmin = np.min(pos[1]) xmax = np.max(pos[1]) ymin = np.min(pos[0]) ymax = np.max(pos[0]) boxes.append([xmin, ymin, xmax, ymax]) boxes = torch.as_tensor(boxes, dtype=torch.float32) # there is only one class if inst_pil is not None: mask_color = np.array(mask_pil) labels = [] for m in masks: labels += [(mask_color*m).max()] labels = torch.LongTensor(labels) else: labels = torch.ones((num_objs,), dtype=torch.int64) masks = torch.as_tensor(masks, dtype=torch.uint8) image_id = torch.tensor([idx]) if num_objs == 0: area = torch.tensor([]) else: area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # suppose all instances are not crowd iscrowd = torch.zeros((num_objs,), dtype=torch.int64) target = {} target["boxes"] = boxes target["labels"] = labels target["masks"] = masks target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd if transforms is not None and len(boxes): img, target = transforms(img_pil, target) else: import torchvision img = torchvision.transforms.ToTensor()(img_pil) target = None labels_tensor = torch.zeros(self.n_classes-1) labels_tensor[labels-1] = 1 # return img, target batch_dict = {'images':img, 'mask_pil':mask_pil, 'image_pil':img_pil, 'original':np.array(img_pil), 'inst_pil':inst_pil, 'label':labels_tensor, 'points':points, 'mask_void':torch.FloatTensor(mask_void), 'targets': target, 'meta':{'index':idx, 'name':name}} return batch_dict
return sal device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = torchvision.models.vgg16(True).to(device) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) transforms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) img_path = 'cat.jpg' img = cv2.imread(img_path) img_transform = transforms(img).unsqueeze(0).to(device) print(img.shape) feature_fn = torch.nn.Sequential(*list(model.children())[:-2]).to(device) classifier_fn = torch.nn.Sequential(*list(model.children())[-2:-1] + [Flatten()] + list(model.children())[-1:]).to(device) middle = feature_fn(img_transform) out = classifier_fn(middle) sal = GradCam(img_transform, feature_fn, classifier_fn) # img_sal = np.array(Image.fromarray(sal).resize(img.shape[:2],resample=Image.LINEAR)) sal_norm = (sal / np.max(sal)) L = toHeatmap(cv2.resize(sal_norm, (224, 224))) L = (L / np.max(L)) * 255