def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] hyp = self.hyp mosaic = True and self.augment # load 4 images at a time into a mosaic (only during training) if mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None else: # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not mosaic: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, img_path, shapes
def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] # Load image img = self.imgs[index] if img is None: img = cv2.imread(img_path) # BGR assert img is not None, 'File Not Found ' + img_path if self.n < 1001: self.imgs[index] = img # cache image into memory # Augment colorspace augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 # must be < 1.0 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # hue, sat, val S = img_hsv[:, :, 1].astype(np.float32) # saturation V = img_hsv[:, :, 2].astype(np.float32) # value a = (random.random() * 2 - 1) * fraction + 1 b = (random.random() * 2 - 1) * fraction + 1 S *= a V *= b img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255) img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # Letterbox h, w, _ = img.shape if self.rect: shape = self.batch_shapes[self.batch[index]] img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='rect') else: shape = self.img_size img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='square') # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) self.labels[index] = x # save for next time if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh # Augment image and labels if self.augment: img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() > 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() > 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def __next__(self): self.count += 1 if self.count == self.nB: raise StopIteration ia = self.count * self.batch_size ib = min((self.count + 1) * self.batch_size, self.nF) height = self.height img_all = [] labels_all = [] for index, files_index in enumerate(range(ia, ib)): img_path = self.__inputs.traindir + str( self.shuffled_vector[files_index]) + '.bmp' img0 = cv2.imread(img_path) if img0 is None: continue augment_hsv = False if augment_hsv: augmentHSV(img0) # Load labels chip = img_path.rsplit('/')[-1] chip = chip.rsplit('_')[-1] i = (self.targetIDs == float( chip.replace('.tif', '').replace('.bmp', ''))).nonzero()[0] labels1 = self.targets[i] img1, labels1, M = random_affine(img0, targets=labels1, degrees=(-20, 20), translate=(0.01, 0.01), scale=(0.70, 1.30)) # RGB self.labels1 = labels1 self.r = pickRandomPoints(100, img0, height, M, img1) self.nL1 = len(labels1) if self.nL1 > 0: self.pickRandom8Points() h, w, _ = img1.shape for j in range(8): self.labels = np.array([], dtype=np.float32) pad_x, pad_y = self.eliminateBadLabels(j) img = img1[pad_y:pad_y + self.height, pad_x:pad_x + self.height] self.nL = len(self.labels) if self.nL > 0: # convert labels to xywh self.labels[:, 1:5] = xyxy2xywh( self.labels[:, 1:5].copy()) / self.height # random lr flip if random.random() > 0.5: img = np.fliplr(img) if self.nL > 0: self.labels[:, 1] = 1 - self.labels[:, 1] # random ud flip if random.random() > 0.5: img = np.flipud(img) if self.nL > 0: self.labels[:, 2] = 1 - self.labels[:, 2] img_all.append(img) labels_all.append(torch.from_numpy(self.labels).float()) # Randomize i = np.random.permutation(len(labels_all)) img_all = [img_all[j] for j in i] labels_all = [labels_all[j] for j in i] # Normalize img_all = np.stack(img_all)[:, :, :, ::-1].transpose( 0, 3, 1, 2) # BGR to RGB and cv2 to pytorch img_all = np.ascontiguousarray(img_all, dtype=np.float32) img_all -= self.rgb_mean img_all /= self.rgb_std return torch.from_numpy(img_all), labels_all
def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] hyp = self.hyp # Load image img = self.imgs[index] if img is None: img = cv2.imread(img_path) # BGR assert img is not None, 'Image Not Found ' + img_path r = self.img_size / max(img.shape) # size ratio if self.augment and r < 1: # if training (NOT testing), downsize to inference shape h, w, _ = img.shape img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR) # INTER_LINEAR fastest # Augment colorspace augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # hue, sat, val S = img_hsv[:, :, 1].astype(np.float32) # saturation V = img_hsv[:, :, 2].astype(np.float32) # value a = random.uniform(-1, 1) * hyp['hsv_s'] + 1 b = random.uniform(-1, 1) * hyp['hsv_v'] + 1 S *= a V *= b img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255) img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # Letterbox h, w, _ = img.shape if self.rect: shape = self.batch_shapes[self.batch[index]] img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='rect') else: shape = self.img_size img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='square') # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded boxes = [] root = ET.parse(label_path).getroot() im_w = int(root.find('size/width').text) im_h = int(root.find('size/height').text) for obj in root.findall('object'): c = obj.find('name').text if c not in self.id2idx: continue cls = self.id2idx[c] x1 = int(obj.find('bndbox/xmin').text) y1 = int(obj.find('bndbox/ymin').text) x2 = int(obj.find('bndbox/xmax').text) y2 = int(obj.find('bndbox/ymax').text) x = 0.5 * (x1 + x2) / im_w y = 0.5 * (y1 + y2) / im_h ww = (x2 - x1) / im_w hh = (y2 - y1) / im_h boxes.append([cls, x, y, ww, hh]) x = np.array(boxes, dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh # Augment image and labels if self.augment: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() > 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() > 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)