def __getitem__(self, index: int): """Obtain the sample with the given index. :param index: Index of dataset. :type index: int :returns: RGB image tensor and dictionary of target description in COCO format :rtype: torch.Tensor, dict """ # Load data and bounding box image = Image.open(self.images[index]) box_path = self.labels[index] # Output of Dataset must be tensor # Assume data is RGB image = ToTensor()(image.convert("RGB")) # (3 x H x W) bbox_list = [] class_list = [] if os.path.exists(box_path): with open(box_path, "r") as csv_file: csv_reader = csv.reader(csv_file, delimiter=' ') for row in csv_reader: left = int(row[0]) # X0 top = int(row[1]) # X1 right = int(row[2]) # Y0 bottom = int(row[3]) # Y1 try: c = int(row[4]) # Class except: # Assume binary class if row[4] does not exist c = 1 bbox_list.append([left, top, right, bottom]) class_list.append(c) boxes = torch.as_tensor(bbox_list, dtype=torch.float32) # if self.image_transform is not None: # image = self.image_transform(image) # Class label labels = torch.as_tensor(class_list, dtype=torch.int64) image_id = torch.tensor([index]) # Assume all instances are not crowd iscrowd = torch.zeros((len(boxes),), dtype=torch.int64) # Add 1 to the area to account for pixels starting at 0,0 and not 1,1 area = ObliqueDataset.calculate_area(boxes) target = {"boxes": boxes, "image_id": image_id, "area": area, "labels": labels, "iscrowd": iscrowd} return image, target
def sr(im, scale): im = im.convert('YCbCr') im, cb, cr = im.split() h, w = im.size im = ToTensor()(im) im = Variable(im).view(1, -1, w, h) im = im.cuda() with torch.no_grad(): im = espcn(im) im = torch.clamp(im, 0., 1.) im = im.cpu() im = im.data[0] im = ToPILImage()(im) cb = cb.resize(im.size, Image.BICUBIC) cr = cr.resize(im.size, Image.BICUBIC) im = Image.merge('YCbCr', [im, cb, cr]) im = im.convert('RGB') return im
if prior: loss, _ = trainer.run_validation(original, synthesis, semantic, label) else: loss, _ = trainer.run_validation(original, synthesis, semantic, label, entropy, mae, distance) val_loss += loss outputs = softmax(outputs) (softmax_pred, predictions) = torch.max(outputs, dim=1) # post processing for semantic, label and prediction semantic_post = torch.zeros([original.shape[0], 3, 256, 512]) for idx, semantic_ in enumerate(semantic): (_, semantic_) = torch.max(semantic_, dim = 0) semantic_ = 256 - np.asarray(ToPILImage()(semantic_.type(torch.FloatTensor).cpu())) semantic_[semantic_ == 256] = 0 semantic_ = visualization.colorize_mask(semantic_) semantic_ = ToTensor()(semantic_.convert('RGB')) semantic_post[idx, :, :, :] = semantic_ label_post = torch.zeros([original.shape[0], 3, 256, 512]) for idx, label_ in enumerate(label): label_ = 256 - np.asarray(ToPILImage()(label_.type(torch.FloatTensor).cpu())) # There must be a better way... label_[label_ == 256] = 0 label_[label_ == 255] = 100 label_[label_ == 1] = 255 label_ = ToTensor()(Image.fromarray(label_).convert('RGB')) label_post[idx, :, :, :] = label_ predictions_post = torch.zeros([original.shape[0], 3, 256, 512]) for idx, predictions_ in enumerate(predictions): predictions_ = np.asarray(ToPILImage()(predictions_.type(torch.FloatTensor).cpu()))