def __getitem__(self, index): # --------- # Image # --------- #index = None #img_path = self.img_path.rstrip() img_path = self.img_files[index % len(self.img_files)].rstrip() # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # Apply augmentations if self.augment: if np.random.random() < 0.5: img = horisontal_flip(img) return img_path, img
def __getitem__(self, index): # --------- # Image # --------- #print(os.path.abspath(__file__)) img_path = self.img_files[index % len(self.img_files)].rstrip() #print(os.path.abspath(img_path)) img_path =os.path.join("../data/vortox-annotation",img_path[1:]) print("img_path="+img_path) # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() label_path = os.path.join('../data/vortox-annotation',label_path[1:]) print("label_path = {}".format(label_path)) targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 9)) a = torch.zeros((len(boxes), 4)) boxes = torch.cat((boxes.float(), a), 1).double() # Extract coordinates for unpadded + unscaled image for i in [1, 3, 5, 7]: boxes[:, i] = (boxes[:, i] + pad[0]) / padded_w tmp = boxes[:, 1:8:2] x_max = torch.max(tmp, 1)[0] x_min = torch.min(tmp, 1)[0] boxes[:, 9] = (x_max + x_min) / 2 boxes[:, 11] = x_max - x_min for i in [2, 4, 6, 8]: boxes[:, i] = (boxes[:, i] + pad[2]) / padded_h tmp = boxes[:, 2:9:2] y_max = torch.max(tmp, 1)[0] y_min = torch.min(tmp, 1)[0] boxes[:, 10] = (y_max + y_min) / 2 boxes[:, 12] = y_max - y_min targets = torch.zeros((len(boxes), 14)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else ( 1, 1) #如果没有进行归一化那么缩放比例是(1,1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 11)) # Extract coordinates for unpadded + unscaled image # x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) # y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) # x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) # y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # # Adjust for added padding # x1 += pad[0] # y1 += pad[2] # x2 += pad[1] # y2 += pad[3] # # Returns (x, y, w, h) # boxes[:, 1] = ((x1 + x2) / 2) / padded_w # boxes[:, 2] = ((y1 + y2) / 2) / padded_h # boxes[:, 3] *= w_factor / padded_w # boxes[:, 4] *= h_factor / padded_h targets = torch.zeros((len(boxes), 12)) #增加1维用来控制batch targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() targets = None if os.path.exists(label_path): # boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) boxes = np.loadtxt(label_path).reshape(-1, 5) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h # targets的第0个维度,是每个batch中图像的索引, 用于标记anchor box属于哪个图像,方便loss的计算 targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = torch.FloatTensor(boxes) # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): img, anno = super(JSONDataset, self).__getitem__(index) # --------- # Image # --------- # Extract image as PyTorch tensor img = transforms.ToTensor()(img) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- bboxes = torch.from_numpy( np.array([ann['bbox'] for ann in anno]).astype(np.float)) # Extract coordinates for unpadded + unscaled image x1 = bboxes[:, 0].clone() y1 = bboxes[:, 1].clone() x2 = bboxes[:, 0] + bboxes[:, 2] y2 = bboxes[:, 1] + bboxes[:, 3] # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) bboxes[:, 0] = ((x1 + x2) / 2.) / padded_w bboxes[:, 1] = ((y1 + y2) / 2.) / padded_h bboxes[:, 2] = bboxes[:, 2] * 1. / padded_w bboxes[:, 3] = bboxes[:, 3] * 1. / padded_h targets = torch.zeros((len(bboxes), 6)) targets[:, 1] = torch.from_numpy( np.array([ self.json_category_id_to_contiguous_id[ann['category_id']] for ann in anno ])) targets[:, 2:] = bboxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) img_id = self.ids[index] return img_id, img, targets
def __getitem__(self, index): img_path = self.img_files[index % len(self.img_files)].rstrip() img = cv2.imread(img_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = transforms.ToTensor()(img) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # img_path, img, targets = self.load_mosaic(index) img_path, img = self.load_image(index) targets = self.load_targets(index, img.numpy()) img = resize(img, self.img_size) # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def get_image(self, img_path, label_path): # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path, dtype=np.float32).reshape(-1, 5)) #print ('boxes ', boxes) if list(boxes.size())[0]!=0: # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h w_ = float(w_factor) / float(padded_w) boxes[:, 3] *= w_ h_ = float(h_factor) / float(padded_h) boxes[:, 4] *= h_ targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- # Extract image as PyTorch tensor img = transforms.ToTensor()(self.load_images(index)) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- boxes = torch.from_numpy(self.load_labels(index)) targets = torch.zeros((len(boxes), 6)) # Adjust for added padding x1,y1,x2,y2 = boxes[:,1],boxes[:,2],boxes[:,3],boxes[:,4] x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) targets[:, 1] = boxes[:,0] targets[:, 2] = ((x1 + x2) / 2) / padded_w targets[:, 3] = ((y1 + y2) / 2) / padded_h targets[:, 4] = (x2-x1)/padded_w targets[:, 5] = (y2-y1)/padded_h # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return _, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) diff_mode = self.diff_mode if diff_mode != 0: diff_img_path = self.diff_img_files[index % len( self.diff_img_files)].rstrip() print("Path check", img_path, diff_img_path) diff_img = transforms.ToTensor()( Image.open(diff_img_path).convert('L')) print(img.dtype, diff_img.dtype) # concat diff img to img if diff_mode == 1: img = torch.cat([img, diff_img], axis=0) print(img.shape) # mode2 (calculate to use diff) if diff_mode == 2: img = img * diff_img * 2 + img # Handle images with less than three channels # if len(img.shape) != 3: # img = img.unsqueeze(0) # img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() img = lycon.load(img_path) # Handles images with less than three channels if len(img.shape) != 3: img = np.expand_dims(img, -1) img = np.repeat(img, 3, -1) h, w, _ = img.shape img, pad = pad_to_square(img, 127.5) padded_h, padded_w, _ = img.shape # Resize to target shape img = lycon.resize(img, height=self.img_size, width=self.img_size) # Channels-first and normalize img = torch.from_numpy(img).float().permute((2, 0, 1)) / 255.0 # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() labels = None if os.path.exists(label_path): labels = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w * (labels[:, 1] - labels[:, 3] / 2) y1 = h * (labels[:, 2] - labels[:, 4] / 2) x2 = w * (labels[:, 1] + labels[:, 3] / 2) y2 = h * (labels[:, 2] + labels[:, 4] / 2) # Adjust for added padding x1 += pad[1][0] y1 += pad[0][0] x2 += pad[1][1] y2 += pad[0][1] if self.is_training: # Returns (x, y, w, h) labels[:, 1] = ((x1 + x2) / 2) / padded_w labels[:, 2] = ((y1 + y2) / 2) / padded_h labels[:, 3] *= w / padded_w labels[:, 4] *= h / padded_h else: # Returns (x1, y1, x2, y2) labels[:, 1] = x1 * (self.img_size / padded_w) labels[:, 2] = y1 * (self.img_size / padded_h) labels[:, 3] = x2 * (self.img_size / padded_w) labels[:, 4] = y2 * (self.img_size / padded_h) # Apply augmentations if self.augment: if np.random.random() < 0.5: img, labels = horisontal_flip(img, labels) # Add dummy label if there are none num_labels = 1 if labels is None else len(labels) boxes = torch.zeros((num_labels, 6)) if labels is not None: boxes[:, 1:] = labels return img_path, img, boxes
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() label_path = self.label_files[index % len(self.img_files)].rstrip() # mosaic = True and self.augment # 4 images # if mosaic: # img, labels = load_mosaic(self, index) # h, w, _ = img.shape # else: # img = load_image(self, index) # h, w, _ = img.shape # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # boxes[:, 0] => class id # Returns (xc, yc, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w # newer center x boxes[:, 2] = ((y1 + y2) / 2) / padded_h # newer center y boxes[:, 3] *= w_factor / padded_w # newer width boxes[:, 4] *= h_factor / padded_h # newer height targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: # 这里的targets是个N行6列的数组 # 来源是第0列的标签列不管,第1到5列是cls和box的大小,这时候的box是pad之后的中心点坐标加宽高 img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- img_path = self.img_files[index % len(self.img_files)] labels = self.label_files[index % len(self.label_files)] class_label = labels[4] x_min = labels[0] y_min = labels[1] x_max = labels[2] y_max = labels[3] # Adjust for added padding # pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) x_min += pad[0] y_min += pad[2] x_max += pad[1] y_max += pad[3] x = ((x_min + x_max) / 2) / padded_w if x_max >= padded_w: return None, None, None # x = ((x_min + x_max - 1) / 2) / padded_w # print("X: ", x) # print("Width of image: ", (x_max - x_min)) y = ((y_min + y_max) / 2) / padded_h if y_max >= padded_h: return None, None, None # y = ((y_min + y_max - 1) / 2) / padded_h # print("Y: ", y) # print("Height of image: ", (y_max - y_min)) w = (x_max - x_min) / padded_w h = (y_max - y_min) / padded_h # Numerical stability # if x + w/2 >= 1: # w -= (x + w/2) - 1 + 1e-16 # if y + h/2 >= 1: # h -= (y + h/2) - 1 + 1e-16 targets = torch.tensor([[0, class_label, x, y, w, h]], dtype=torch.float) # print("X: ", x) # print("Y: ", y) # print("W: ", w) # print("H: ", h) # print("Far X: ", x + w/2) # print("Far y: ", y + h/2) # assert x + w/2 < 1 # assert x - w/2 > 0 # assert y + h/2 < 1 # assert y - h/2 > 0 # Improting Image class from PIL module # Cropped image of above dimension # (It will not change orginal image) # img = (img.numpy() * 255)[0] # print(img) # img = Image.fromarray(img) # print(img) # cropped_image = img.crop((x*padded_w - (w*padded_w) / 2, y*padded_h - (h*padded_h) / 2, x*padded_w + (w*padded_w) / 2, y*padded_h + (h*padded_h) / 2)) # # Shows the image in image viewer # cropped_image.show() # assert False # targets = None # if os.path.exists(label_path): # boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # # Extract coordinates for unpadded + unscaled image # x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) # y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) # x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) # y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # # Adjust for added padding # x1 += pad[0] # y1 += pad[2] # x2 += pad[1] # y2 += pad[3] # # Returns (x, y, w, h) # boxes[:, 1] = ((x1 + x2) / 2) / padded_w # boxes[:, 2] = ((y1 + y2) / 2) / padded_h # boxes[:, 3] *= w_factor / padded_w # boxes[:, 4] *= h_factor / padded_h # targets = torch.zeros((len(boxes), 6)) # targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() img_path += '.jpg' img_path = self.img_path + img_path # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) _, height, weight = img.shape # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() targets = None if os.path.exists(label_path): with open(label_path, 'r') as f: lines = f.readlines() f.close() list = [] for i in range(len(lines)): line = lines[i] label = line.split(' ')[1] x_left = line.split(' ')[2] x_left = int(x_left) y_left = line.split(' ')[3] y_left = int(y_left) x_right = line.split(' ')[4] x_right = int(x_right) y_right = line.split(' ')[5] y_right = int(y_right) olabel, x_mid, y_mid, x_length, y_length = self.get_arguments( height, weight, label, x_left, y_left, x_right, y_right) x_mid = '%.5f' % x_mid y_mid = '%.5f' % y_mid x_length = '%.5f' % x_length y_length = '%.5f' % y_length list.append([ float(olabel), float(x_mid), float(y_mid), float(x_length), float(y_length) ]) nplist = np.array(list) boxes = torch.from_numpy(nplist.reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() # Extract image as PyTorch tensor #img_path = os.path.join('/home/users/visionintelligence/rohit/proj/yolo/PyTorch-YOLOv3/data', img_path) #img_path = img_path.replace('./train_sets/train_set_1', 'data/custom') img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) #resize images here itself img = resize(img, 512) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape #print(f'image shape is {img.shape}\n') h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() #获得第index张图片 # Extract image as PyTorch tensor img = transforms.ToTensor()( Image.open(img_path).convert('RGB')) #用RGB格式读入图片 # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) #增加通道数,扩充成三通道 _, h, w = img.shape self.orl_size.append([h, w]) # print(self.orl_size) h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) #原图的高和宽 # Pad to square resolution img, pad = pad_to_square(img, 0) #向图片中添加像素 _, padded_h, padded_w = img.shape #填充后的h,w # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() #获得标签 targets = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape( -1, 5)) #boxes为标签列表 # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2 ) #获得x1,y1,x2,y2,这是本来就有的数据 # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] #求添加空白后的x1,y1,x2,y2 # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w #求填充后的w,h boxes[:, 4] *= h_factor / padded_h targets = torch.zeros((len(boxes), 6)) #目标等于标签的数量 targets[:, 1:] = boxes #中间5个相同 # Apply augmentations if self.augment: #不添加噪声 if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets #返回图片位置,填充后的图片和targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() targets = None boxes = None if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h # boxes的数量就是单个图片中框选出的目标数量,在5列前增加一个序号列,变成了[idx,class,x,y,w,h] targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations if self.augment: if np.random.random() < 0.5: # 在训练visdrone数据集时发现这步有时候会出现如下错误:(已解决) # targets[:, 2] = 1 - targets[:, 2] # TypeError: 'NoneType' object is not subscriptable try: img, targets = horisontal_flip(img, targets) except: print(f"datasets.py: img_path={img_path}") print(f"targets={targets}") print(f"boxes={boxes}") return img_path, img, targets
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() img_path = 'data/coco' + img_path # Extract image as PyTorch tensor img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) # Handle images with less than three channels if len(img.shape) != 3: img = img.unsqueeze(0) img = img.expand((3, img.shape[1:])) _, h, w = img.shape ''' 如果是归一化的标签,则返回原图的尺度 ''' h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) _, padded_h, padded_w = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() label_path = 'data/coco' + label_path targets = None '''将原归一化的标签转化为pad后的归一化标签''' if os.path.exists(label_path): boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] y1 += pad[2] x2 += pad[1] y2 += pad[3] # Returns (x, y, w, h) '''将true_label转换成归一化的(x,y,w,h),x,y是label的中心坐标''' boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h '''targets 第1-5列保存boxes 第0-4列的值 targets第0列在下面的collate_fn函数中用来生成索引 ''' targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = boxes # Apply augmentations '''数据增强''' if self.augment: if np.random.random() < 0.5: img, targets = horisontal_flip(img, targets) return img_path, img, targets