def __next__(self): self.count += 1 # print('11') if self.count == self.nB: raise StopIteration ia = self.count * self.batch_size # ib = min((self.count + 1) * self.batch_size, self.nF) # ia = self.count * 4 # ib = min((self.count + 1) * 4, self.nF2) if self.multi_scale: # Multi-Scale YOLO Training height = random.choice(range(10, 20)) * 32 # 320 - 608 pixels else: # Fixed-Scale YOLO Training height = self.height img_all = [] labels_all = [] for index, files_index in enumerate(range(ia, ib)): # if index >= 4: # img_path = self.temp[self.shuffled_vector_2[files_index]] # label_path = self.temp_label[self.shuffled_vector_2[files_index]] # else: img_path = self.img_files[self.shuffled_vector[files_index]] # print(img_path) # img_path = 'D:/00-Data/rtpose_datasets/COCO/images' + img_path # print(img_path) label_path = self.label_files[self.shuffled_vector[files_index]] # print(label_path.split('/')[-1]) label_path = 'D:/01-ComputerVisionEntries/10-HumanDetection/PyTorch-YOLOv3-master/labels/train/' + label_path.split( '/')[-1] # print(label_path) # face_path = # print(label_path) # print(label_path) img = cv2.imread(img_path) # BGR # print(img_path) if img is None: print(img_path) continue augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: np.clip(S, a_min=0, a_max=255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: np.clip(V, a_min=0, a_max=255, out=V) img_hsv[:, :, 1] = S.astype(np.uint8) img_hsv[:, :, 2] = V.astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) h, w, _ = img.shape # print(h, w,_) img, ratio, padw, padh = resize_square(img, height=height, color=(127.5, 127.5, 127.5)) # print(padw, padh) # Load labels if os.path.isfile(label_path): labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 5) # Normalized xywh to pixel xyxy format labels0[:, 1] = (labels0[:, 1] + labels0[:, 3] / 2) # center x labels0[:, 2] = (labels0[:, 2] + labels0[:, 4] / 2) # center y labels = labels0.copy() # x1,y1, x2,y2 #ratio = old/new labels[:, 1] = ratio * w * ( labels0[:, 1] - labels0[:, 3] / 2) + padw # 将归一化的坐标值--恢复--转换成resize之后的坐标值 labels[:, 2] = ratio * h * (labels0[:, 2] - labels0[:, 4] / 2) + padh labels[:, 3] = ratio * w * (labels0[:, 1] + labels0[:, 3] / 2) + padw labels[:, 4] = ratio * h * (labels0[:, 2] + labels0[:, 4] / 2) + padh else: labels = np.array([]) # Augment image and labels if self.augment: img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) # print(img.shape) plotFlag = False if plotFlag: import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) if index == 0 else None plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1]) plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-') plt.axis('off') nL = len(labels) if nL > 0: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height # print(labels[:, 1]) if self.augment: # random left-right flip lr_flip = True if lr_flip & (random.random() > 0.5): img = np.fliplr(img) if nL > 0: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip & (random.random() > 0.5): img = np.flipud(img) if nL > 0: labels[:, 2] = 1 - labels[:, 2] img_all.append(img) labels_all.append(torch.from_numpy(labels)) # Normalize img_all = np.stack(img_all)[:, :, :, ::-1].transpose( 0, 3, 1, 2) # Channel first and cv2 to pytorch img_all = np.ascontiguousarray(img_all, dtype=np.float32) # img_all -= self.rgb_mean # img_all /= self.rgb_std # print(img_all.shape) img_all /= 255.0 return torch.from_numpy(img_all), labels_all
def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] hyp = self.hyp # Load image img = self.imgs[index] if img is None: img = cv2.imread(img_path) # BGR assert img is not None, 'Image Not Found ' + img_path r = self.img_size / max(img.shape) # size ratio if self.augment and r < 1: # if training (NOT testing), downsize to inference shape h, w, _ = img.shape img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR) # INTER_LINEAR fastest # Augment colorspace augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # hue, sat, val S = img_hsv[:, :, 1].astype(np.float32) # saturation V = img_hsv[:, :, 2].astype(np.float32) # value a = random.uniform(-1, 1) * hyp['hsv_s'] + 1 b = random.uniform(-1, 1) * hyp['hsv_v'] + 1 S *= a V *= b img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255) img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # Letterbox h, w, _ = img.shape if self.rect: shape = self.batch_shapes[self.batch[index]] img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='rect') else: shape = self.img_size img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='square') # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh # Augment image and labels if self.augment: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() > 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() > 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def __next__(self): self.count += 1 if self.count == self.nB: raise StopIteration ia = self.count * self.batch_size ib = min((self.count + 1) * self.batch_size, self.nF) if self.multi_scale: # Multi-Scale YOLO Training height = random.choice(range(10, 20)) * 32 # 320 - 608 pixels else: # Fixed-Scale YOLO Training height = self.height img_all = [] labels_all = [] for index, files_index in enumerate(range(ia, ib)): img_path = self.img_files[self.shuffled_vector[files_index]] label_path = self.label_files[self.shuffled_vector[files_index]] img = cv2.imread(os.path.join("image_train", img_path)) # BGR #print(img.shape) if img is None: continue augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: np.clip(S, a_min=0, a_max=255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: np.clip(V, a_min=0, a_max=255, out=V) img_hsv[:, :, 1] = S.astype(np.uint8) img_hsv[:, :, 2] = V.astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) h, w, _ = img.shape img, ratio, padw, padh = letterbox(img, height=height) # Load labels if os.path.isfile(os.path.join("labels_txt", label_path)): with warnings.catch_warnings(): warnings.simplefilter("ignore") labels0 = np.loadtxt(os.path.join("labels_txt", label_path), dtype=np.float32).reshape(-1, 5) #print(os.path.join("data_train",label_path)) #data = np.loadtxt(myfile, unpack=True) #labels0 = np.loadtxt(os.path.join("data_train",label_path), dtype=np.float32).reshape(-1, 5) #print('='*50, os.path.join("labels_txt",label_path)) #print() # Normalized xywh to pixel xyxy format labels = labels0.copy() #labels[:, 0] = np.int32(labels[:, 0]) labels[:, 1] = ratio * w * (labels0[:, 1] - labels0[:, 3] / 2) + padw labels[:, 2] = ratio * h * (labels0[:, 2] - labels0[:, 4] / 2) + padh labels[:, 3] = ratio * w * (labels0[:, 1] + labels0[:, 3] / 2) + padw labels[:, 4] = ratio * h * (labels0[:, 2] + labels0[:, 4] / 2) + padh #print(labels) else: print(os.path.join("data_train", label_path)) print("st wrong") labels = np.array([]) # Augment image and labels if self.augment: img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) plotFlag = False if plotFlag: import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) if index == 0 else None plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1]) plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-') plt.axis('off') nL = len(labels) if nL > 0: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height # labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height # print(os.path.join("data_train",label_path)) # print(labels) if self.augment: # random left-right flip lr_flip = True if lr_flip & (random.random() > 0.5): img = np.fliplr(img) if nL > 0: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip & (random.random() > 0.5): img = np.flipud(img) if nL > 0: labels[:, 2] = 1 - labels[:, 2] img_all.append(img) labels_all.append(torch.from_numpy(labels)) # Normalize img_all = np.stack(img_all)[:, :, :, ::-1].transpose( 0, 3, 1, 2) # BGR to RGB and cv2 to pytorch img_all = np.ascontiguousarray(img_all, dtype=np.float32) img_all /= 255.0 return torch.from_numpy(img_all), labels_all
def __next__(self): self.count += 1 if self.count == self.nB: raise StopIteration ia = self.count * self.batch_size ib = min((self.count + 1) * self.batch_size, self.nF) img_all, labels_all, img_paths, img_shapes = [], [], [], [] for index, files_index in enumerate(range(ia, ib)): img_path = self.img_files[self.shuffled_vector[files_index]] label_path = self.label_files[self.shuffled_vector[files_index]] img = cv2.imread(img_path) # BGR assert img is not None, 'File Not Found ' + img_path augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: np.clip(S, a_min=0, a_max=255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: np.clip(V, a_min=0, a_max=255, out=V) img_hsv[:, :, 1] = S.astype(np.uint8) img_hsv[:, :, 2] = V.astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) h, w, _ = img.shape img, ratio, padw, padh = letterbox(img, height=self.img_size) # Load labels if os.path.isfile(label_path): labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 5) # Normalized xywh to pixel xyxy format labels = labels0.copy() labels[:, 1] = ratio * w * (labels0[:, 1] - labels0[:, 3] / 2) + padw labels[:, 2] = ratio * h * (labels0[:, 2] - labels0[:, 4] / 2) + padh labels[:, 3] = ratio * w * (labels0[:, 1] + labels0[:, 3] / 2) + padw labels[:, 4] = ratio * h * (labels0[:, 2] + labels0[:, 4] / 2) + padh else: labels = np.array([]) # Augment image and labels if self.augment: img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) plotFlag = False if plotFlag: import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) if index == 0 else None plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1]) plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-') plt.axis('off') nL = len(labels) if nL > 0: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / self.img_size if self.augment: # random left-right flip lr_flip = True if lr_flip & (random.random() > 0.5): img = np.fliplr(img) if nL > 0: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip & (random.random() > 0.5): img = np.flipud(img) if nL > 0: labels[:, 2] = 1 - labels[:, 2] if nL > 0: labels = np.concatenate((np.zeros( (nL, 1), dtype='float32') + index, labels), 1) labels_all.append(labels) img_all.append(img) img_paths.append(img_path) img_shapes.append((h, w)) # Normalize img_all = np.stack(img_all)[:, :, :, ::-1].transpose( 0, 3, 1, 2) # BGR to RGB and cv2 to pytorch img_all = np.ascontiguousarray(img_all, dtype=np.float32) img_all /= 255.0 labels_all = torch.from_numpy(np.concatenate(labels_all, 0)) return torch.from_numpy(img_all), labels_all, img_paths, img_shapes
def __getitem__(self, index): img_path = self.img_files[index] label_path = self.label_files[index] img = cv2.imread(img_path) # BGR assert img is not None, 'File Not Found ' + img_path augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 # must be < 1.0 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: np.clip(S, None, 255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: np.clip(V, None, 255, out=V) img_hsv[:, :, 1] = S # .astype(np.uint8) img_hsv[:, :, 2] = V # .astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) h, w, _ = img.shape img, ratio, padw, padh = letterbox(img, height=self.img_size) # Load labels labels = [] if os.path.isfile(label_path): with open(label_path, 'r') as file: lines = file.read().splitlines() x = np.array([x.split() for x in lines], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh # Augment image and labels if self.augment: img, labels = random_affine(img, labels, degrees=(-10, 10), translate=(0.10, 0.10), scale=(0.80, 1.20)) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() > 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() > 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def __getitem__(self, index): # 新的下角标 if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] hyp = self.hyp mosaic = False and self.augment # 如果开启镶嵌增强、数据增强 # 加载四张图片,作为一个镶嵌,具体看下文解析。 if mosaic: # 加载镶嵌内容 img, labels = load_mosaic(self, index) shapes = None else: # 加载图片 img, (h0, w0), (h, w) = load_image(self, index) # 仿射变换 shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ( (h / h0, w / w0), pad) # 加载标注文件 labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # 如果标签没有加载,读取label_path内容 with open(label_path, 'r') as f: x = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # 将归一化后的xywh转化为左上角、右下角的表达形式 labels = x.copy() labels[:, 1] = ratio[0] * w * ( x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * ( x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # 图片空间的数据增强 if not mosaic: # 如果没有使用镶嵌的方法,那么对图片进行随机放射 img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) # 增强hsv空间 augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) nL = len(labels) # 标注文件个数 if nL: # 将 xyxy 格式转化为 xywh 格式 labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])# x1, y1, x2, y2 # 归一化到0-1之间 labels[:, [2, 4]] /= img.shape[0] # height y1/h , y2/h labels[:, [1, 3]] /= img.shape[1] # width x1/h , x2/h if self.augment: # 随机左右翻转 lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # 随机上下翻转 ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # 图像维度转换 img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, img_path, shapes
def __getitem__(self, index): img = self.load_image(index) anns, K = self.load_annotations(index) # Change h, w -> w, h size = np.array([i for i in img.shape[:-1]], np.float32)[::-1] center = np.array([i/2 for i in img.shape[:-1]], np.float32)[::-1] # return img, K """ resize, horizontal flip, and affine augmentation are performed here. since it is complicated to compute heatmap w.r.t transform. """ flipped = False if (self.is_train) and (np.random.rand() < self.flip_prob): flipped = True img = cv2.flip(img, 1) center[0] = size[0] - center[0] - 1 K[0, 2] = size[0] - K[0, 2] - 1 affine = False if (self.is_train) and (np.random.rand() < self.aug_prob): img, target, trans_mat = random_affine(img, degrees=0, translate=.1, scale=.1) affine = True ''' TODO: affine the label mat point = affine_transform(point, trans_mat) box2d[:2] = affine_transform(box2d[:2], trans_mat) box2d[2:] = affine_transform(box2d[2:], trans_mat) TODO:There is something wrong when clip after resize box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.input_width - 1) box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.input_height - 1) ''' resize = False if img.shape[0] != self.input_height | img.shape[1] != self.input_width: img, ratio, pad = resize_image_with_pad(img, (self.input_height, self.input_width)) resize = True labels = np.zeros((len(anns), 9)) for i, a in enumerate(anns): a = a.copy() _cls = a["label"] locs = np.array(a["locations"]) rot_y = np.array(a["rot_y"]) if flipped: locs[0] *= -1 rot_y *= -1 # We can get 2D bbox by labels or calculate by camera&3D bbox directly point, box2d, box3d = encode_label( K, rot_y, a["dimensions"], locs ) # 当图像中的物体不全时,计算出的2D框会超出图像大小范围,这里先使用标注信息替代 box2d = a["bbox"] labels[i, 0] = _cls labels[i, 1:5] = np.array(box2d) labels[i, 5:8] = np.array(a["dimensions"]) labels[i, 8] = rot_y # h, w = box2d[3] - box2d[1], box2d[2] - box2d[0] nL = len(labels) if nL > 0: if resize: labels[:, 1] = ratio[0] * labels[:, 1] + pad[0] # pad width labels[:, 2] = ratio[1] * labels[:, 2] + pad[1] # pad height labels[:, 3] = ratio[0] * labels[:, 3] + pad[0] labels[:, 4] = ratio[1] * labels[:, 4] + pad[1] # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width # labels[labels < 0] = 0.0 label_out = torch.zeros((nL, self.out_parms+1)) if nL > 0: label_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) shapes = (size[1], size[0]), (size[1]/img.shape[2], size[0]/img.shape[1], pad) return torch.from_numpy(img), label_out[:, :6], self.files[index], shapes
def __next__(self): self.count += 1 if self.count == self.nB: raise StopIteration ia = self.count * self.batch_size ib = min((self.count + 1) * self.batch_size, self.nF) height = self.height # height = random.choice([15, 17, 19, 21]) * 32 img_all = [] labels_all = [] for index, files_index in enumerate(range(ia, ib)): # img_path = self.files[self.shuffled_vector[files_index]] # BGR img_path = '%s/%g.bmp' % (self.path, self.shuffled_vector[files_index]) # img_path = '/Users/glennjocher/Downloads/DATA/xview/train_images/2294.bmp' img0 = cv2.imread(img_path) if img0 is None: continue augment_hsv = True if augment_hsv: # SV augmentation by 50% fraction = 0.50 img_hsv = cv2.cvtColor(img0, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: np.clip(S, a_min=0, a_max=255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: np.clip(V, a_min=0, a_max=255, out=V) img_hsv[:, :, 1] = S.astype(np.uint8) img_hsv[:, :, 2] = V.astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img0) # Load labels chip = img_path.rsplit('/')[-1] i = (self.mat['id'] == float( chip.replace('.tif', '').replace('.bmp', ''))).nonzero()[0] labels1 = self.mat['targets'][i] # Remove buildings and small cars # labels1 = labels1[(labels1[:, 0] != 5) & (labels1[:, 0] != 48)] img1, labels1, M = random_affine(img0, targets=labels1, degrees=(-20, 20), translate=(0.01, 0.01), scale=(0.70, 1.30)) # RGB nL1 = len(labels1) border = height / 2 + 1 # Pick 100 random points inside image r = np.ones((100, 3)) r[:, :2] = np.random.rand( 100, 2) * (np.array(img0.shape)[[1, 0]] - border * 2) + border r = (r @ M.T)[:, :2] r = r[np.all(r > border, 1) & np.all(r < img1.shape[0] - border, 1)] # import matplotlib.pyplot as plt # plt.imshow(img1[:, :, ::-1]) # plt.plot(labels1[:, [1, 3, 3, 1, 1]].T, labels1[:, [2, 2, 4, 4, 2]].T, '.-') # plt.plot(r[:,0],r[:,1],'.') if nL1 > 0: weights = [] for k in range(len(r)): x = (labels1[:, 1] + labels1[:, 3]) / 2 y = (labels1[:, 2] + labels1[:, 4]) / 2 c = labels1[(abs(r[k, 0] - x) < height / 2) & (abs(r[k, 1] - y) < height / 2), 0] if len(c) == 0: weights.append(1e-16) else: weights.append(self.class_weights[c.astype( np.int8)].sum()) weights = np.array(weights) weights /= weights.sum() r = r[np.random.choice(len(r), size=8, p=weights, replace=False)] if nL1 > 0: area0 = (labels1[:, 3] - labels1[:, 1]) * (labels1[:, 4] - labels1[:, 2]) h, w, _ = img1.shape for j in range(8): labels = np.array([], dtype=np.float32) pad_x = int(r[j, 0] - height / 2) pad_y = int(r[j, 1] - height / 2) if nL1 > 0: labels = labels1.copy() labels[:, [1, 3]] -= pad_x labels[:, [2, 4]] -= pad_y np.clip(labels[:, 1:5], 0, height, out=labels[:, 1:5]) lw = labels[:, 3] - labels[:, 1] lh = labels[:, 4] - labels[:, 2] area = lw * lh ar = np.maximum(lw / (lh + 1e-16), lh / (lw + 1e-16)) # objects must have width and height > 4 pixels labels = labels[(lw > 4) & (lh > 4) & (area > 20) & (area / area0 > 0.1) & (ar < 10)] # pad_x, pad_y, counter = 0, 0, 0 # while (counter < len(r)) & (len(labels) == 0): # pad_x = int(r[counter, 0] - height / 2) # pad_y = int(r[counter, 1] - height / 2) # # if nL1 == 0: # break # # labels = labels1.copy() # labels[:, [1, 3]] -= pad_x # labels[:, [2, 4]] -= pad_y # labels[:, 1:5] = np.clip(labels[:, 1:5], 0, height) # # lw = labels[:, 3] - labels[:, 1] # lh = labels[:, 4] - labels[:, 2] # area = lw * lh # ar = np.maximum(lw / (lh + 1e-16), lh / (lw + 1e-16)) # # # objects must have width and height > 4 pixels # labels = labels[(lw > 4) & (lh > 4) & (area / area0 > 0.2) & (ar < 15)] # counter += 1 img = img1[pad_y:pad_y + height, pad_x:pad_x + height] # import matplotlib.pyplot as plt # plt.subplot(4, 4, j+1).imshow(img[:, :, ::-1]) # plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-') nL = len(labels) if nL > 0: # convert labels to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height # remap xview classes 11-94 to 0-61 # labels[:, 0] = xview_classes2indices(labels[:, 0]) # random lr flip if random.random() > 0.5: img = np.fliplr(img) if nL > 0: labels[:, 1] = 1 - labels[:, 1] # random ud flip if random.random() > 0.5: img = np.flipud(img) if nL > 0: labels[:, 2] = 1 - labels[:, 2] img_all.append(img) labels_all.append(torch.from_numpy(labels)) # Randomize i = np.random.permutation(len(labels_all)) img_all = [img_all[j] for j in i] labels_all = [labels_all[j] for j in i] # Normalize img_all = np.stack(img_all)[:, :, :, ::-1].transpose( 0, 3, 1, 2) # BGR to RGB and cv2 to pytorch img_all = np.ascontiguousarray(img_all, dtype=np.float32) img_all -= self.rgb_mean img_all /= self.rgb_std return torch.from_numpy(img_all), labels_all
def detect(input_image, save_img=True): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size global frame_num, model global pubCentBlem, pubCentUnBlem # Initialize #device = torch_utils.select_device(opt.device) #if os.path.exists(out): # shutil.rmtree(out) # delete output folder #os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # to FP16 # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names print("class names array ", names) # names = ['blemished', 'unblemished', 'glove', 'belt', 'bin', 'head'] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once bounding_boxes_all_images = [] img0 = input_image.astype('float32') #img0 = cv2.resize(img0, (640,480), interpolation = cv2.INTER_AREA) img = letterbox(img0, new_shape=imgsz)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) dataset = [("frame_num" + str(frame_num) + '.jpg', img, img0, None)] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=opt.augment)[0] # pred = model(img)[0] t2 = torch_utils.time_synchronized() # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t3 = torch_utils.time_synchronized() # needed specific to each image Loc2arrCent = {} arrCentBlem = [] arrCentUnBlem = [] Loc2Cls = {} # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh bounding_boxes = {} if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results minx = 5000 miny = 5000 maxx = 0 maxy = 0 box_num = 0 bounding_boxes = {} for *xyxy, conf, cls in det: box_num += 1 xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh #print("Box num:", box_num, " label: ",names[int(cls)], " xyxy: ", int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]), "xywh: ", float(xywh[0]), float(xywh[1]), float(xywh[2]), float(xywh[3])) if (bounding_boxes.get(names[int(cls)], None) == None): bounding_boxes[names[int(cls)]] = [[ int(xyxy[0]), int(xyxy[1]), abs(int(xyxy[2]) - int(xyxy[0])), abs(int(xyxy[3]) - int(xyxy[1])) ]] else: bounding_boxes[names[int(cls)]].append([ int(xyxy[0]), int(xyxy[1]), abs(int(xyxy[2]) - int(xyxy[0])), abs(int(xyxy[3]) - int(xyxy[1])) ]) tlx, tly, brx, bry = int(xyxy[0]), int(xyxy[1]), int( xyxy[2]), int(xyxy[3]) if tlx < minx: minx = tlx if tly < miny: miny = tly if bry > maxy: maxy = bry if brx > maxx: maxx = brx #crop_img = img[y:y+h, x:x+w] if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(save_path[:save_path.rfind('.')] + '.txt', 'a') as file: file.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) # print("label input to plot_one_box ",label) # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1) # append onion centroid to list of centroids if names[int(cls)] == 'blemished' or names[int( cls)] == 'unblemished': centx = (tlx + brx) / 2 centy = (tly + bry) / 2 if centx > 250: # FOR CAMERA LOOKING FROM FRONT # if center is on the belt if names[int(cls)] == 'blemished': Loc2Cls[centx] = 0 else: Loc2Cls[centx] = 1 Loc2arrCent[centx] = (centx, centy) if names[int( cls )] == 'blemished' and centx not in arrCentBlem: arrCentBlem.append(centx) arrCentBlem.append(centy) else: if centx not in arrCentUnBlem: arrCentUnBlem.append(centx) arrCentUnBlem.append(centy) ############################################### all bounding boxes for this frames: "bounding_boxes" ######################################################################################################## bounding_boxes_all_images.append(bounding_boxes) print("Frame number = ", frame_num, "Bounding boxes: ", bounding_boxes) frame_num += 1 # Print time (inference + NMS) print('%s Inference. (%.3fs)' % (s, t2 - t1)) print('%s NMS. (%.3fs)' % (s, t3 - t2)) #print('im0.shape before',im0.shape) ############################################### cropping #im0 = im0[miny:maxy, minx:maxx] #print('im0.shape after',im0.shape) # Stream results view_img = True if view_img: #cv2_imshow( im0) not_showing = True #cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) save_img = True if save_img: #'src/beginner_tutorials/scripts/yolov5/inference/output' #save_path = '/home/psuresh/src/beginner_tutorials/scripts/yolov5/inference/output/frame1.jpg' cv2.imwrite(save_path, im0) #if dataset.mode == 'images': ########################################################## comment this line to avoid saving images ######################################################################################################## # publish centroids for current image print("publishing arrCentBlem ", arrCentBlem) print("publishing arrCentUnBlem ", arrCentUnBlem) pubCentBlem.publish(Float32MultiArray(data=arrCentBlem)) pubCentUnBlem.publish(Float32MultiArray(data=arrCentUnBlem)) # make array LocOrderedPreds LocOrderedPreds = list( OrderedDict(sorted(Loc2Cls.items(), key=lambda t: t[0])).values()) print("publishing LocOrderedPreds ", LocOrderedPreds) pubLocOrderedPreds.publish(Int32MultiArray(data=LocOrderedPreds)) arrCentxyOrd = list( OrderedDict(sorted(Loc2arrCent.items(), key=lambda t: t[0])).values()) print("arrCentxyOrd ", arrCentxyOrd) arrCentLocOrd = [] for i in range(len(arrCentxyOrd)): arrCentLocOrd.append(arrCentxyOrd[i][0]) arrCentLocOrd.append(arrCentxyOrd[i][1]) print("publishing arrCentLocOrd ", arrCentLocOrd) pubCentLocOrd.publish(Float32MultiArray(data=arrCentLocOrd)) #global img_processed #img_processed = True if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0)) return bounding_boxes_all_images
def detect(self, outfile=None): frame_cnt = -1 if outfile is not None: f = open(outfile, 'w') print("begin....") while self.vdo.grab(): frame_cnt += 1 if frame_cnt % 3 == 0: continue start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im t1_begin = time.time() bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im) t1_end = time.time() t2_begin = time.time() if bbox_xxyy is not None: # select class # mask = cls_ids == 0 # bbox_xxyy = bbox_xxyy[mask] # bbox_xxyy[:, 3:] *= 1.2 # cls_conf = cls_conf[mask] bbox_xcycwh = xyxy2xywh(bbox_xxyy) outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] # 画框 ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) # frame, id, tlwh(%.2f),1,-1,-1,-1 if outfile is not None: box_xywh = xyxy2tlwh(bbox_xyxy) for i in range(len(box_xywh)): write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % ( frame_cnt + 1, outputs[i, -1], int(box_xywh[i][0]), int(box_xywh[i][1]), int(box_xywh[i][2]), int(box_xywh[i][3])) f.write(write_line) t2_end = time.time() end = time.time() print( "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin), (end - start), ((t1_end - t1_begin) * 100 / ((end - start))), (1 / (end - start)))) if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im) if outfile is not None: f.close()
def __getitem__(self, index): img_path = self.img_files[index] label_path = self.label_files[index] # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) # print (f'before loader {torch.from_numpy(img).shape}_{labels_out}_{img_path}') return torch.from_numpy(img), labels_out, img_path, shapes
def __getitem__(self, index): img_uri = self.img_files[index] img_labels = self.labels[index] # don't download, since it was already downloaded in the init img_path = img_uri img_name = ("_".join(map(str, img_path.split("_")[-5:]))) orig_img = PIL.Image.open(img_path).convert('RGB') if orig_img is None: raise Exception( "Empty image: {img_path}".format(img_path=img_path)) if self.vis_batch and len(img_labels) > 0: vis_orig_img = copy.deepcopy(orig_img) labels = add_class_dimension_to_labels(img_labels) labels = xyhw2xyxy_corner(labels, skip_class_dimension=True) tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + ".jpg") visualize_and_save_to_local(vis_orig_img, labels, tmp_path, box_color="green") print(f'new image uploaded to {tmp_path}') # First, handle image re-shaping if self.ts: scale = self.scales[index] scaled_img = scale_image(orig_img, scale) scaled_img_width, scaled_img_height = scaled_img.size patch_width, patch_height = self.width, self.height vert_pad, horiz_pad = pre_tile_padding(scaled_img_width, scaled_img_height, patch_width, patch_height) padded_img = torchvision.transforms.functional.pad( scaled_img, padding=(horiz_pad, vert_pad, horiz_pad, vert_pad), fill=(127, 127, 127), padding_mode="constant") padded_img_width, padded_img_height = padded_img.size _, _, n_patches, _, _ = get_patch_spacings(padded_img_width, padded_img_height, patch_width, patch_height) patch_index = random.randint(0, n_patches - 1) if self.debug_mode: patch_index = 0 img, boundary = get_patch(padded_img, patch_width, patch_height, patch_index) else: orig_img_width, orig_img_height = orig_img.size vert_pad, horiz_pad, ratio = calculate_padding( orig_img_height, orig_img_width, self.height, self.width) img = torchvision.transforms.functional.pad( orig_img, padding=(horiz_pad, vert_pad, horiz_pad, vert_pad), fill=(127, 127, 127), padding_mode="constant") img = torchvision.transforms.functional.resize( img, (self.height, self.width)) # If no labels, no need to do augmentation (this should change in the future) # so immediately return with the padded image and empty labels if len(img_labels) == 0: labels = torch.zeros((len(img_labels), 5)) img = torchvision.transforms.functional.to_tensor(img) labels = F.pad( labels, pad=[0, 0, 0, self.num_targets_per_image - len(labels)], mode="constant") return img_uri, img, labels # Next, handle label re-shaping labels = add_class_dimension_to_labels(img_labels) labels = xyhw2xyxy_corner(labels) if self.ts: labels = scale_labels(labels, self.scales[index]) labels = add_padding_on_each_side(labels, horiz_pad, vert_pad) if self.vis_batch: tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_scaled.jpg") visualize_and_save_to_local(padded_img, labels, tmp_path, box_color="red") labels_temp = filter_and_offset_labels(labels, boundary) if self.vis_batch: pre_vis_labels = copy.deepcopy(labels) for i in range(n_patches): vis_patch_img, boundary = get_patch( padded_img, patch_width, patch_height, i) labels = filter_and_offset_labels(pre_vis_labels, boundary) tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \ "_patch_{}.jpg".format(i)) visualize_and_save_to_local(vis_patch_img, labels, tmp_path, box_color="blue") if self.upload_dataset: pre_vis_labels = copy.deepcopy(labels) for i in range(n_patches): vis_patch_img, boundary = get_patch( padded_img, patch_width, patch_height, i) labels = filter_and_offset_labels(pre_vis_labels, boundary) tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \ "_patch_{}.jpg".format(i)) upload_label_and_image_to_gcloud(vis_patch_img, labels, tmp_path) else: labels = filter_and_offset_labels(labels, boundary) else: labels = add_padding_on_each_side(labels, horiz_pad, vert_pad) labels = scale_labels(labels, ratio) labels_temp = labels if self.vis_batch: tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_pad_resized.jpg") visualize_and_save_to_local(img, labels, tmp_path, box_color="blue") labels = labels_temp if self.vis_batch and self.data_aug: vis_aug_img = copy.deepcopy(img) tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_before_aug.jpg") visualize_and_save_to_local(vis_aug_img, labels, tmp_path, box_color="red") if self.augment_hsv or self.data_aug: if random.random() > 0.5: img = self.jitter(img) # no transformation on labels # Augment image and labels img_width, img_height = img.size if self.augment_affine or self.data_aug: if random.random() > 0: angle = random.uniform(-10, 10) translate = (random.uniform(-40, 40), random.uniform(-40, 40)) ## WORKS scale = random.uniform(0.9, 1.1) shear = random.uniform(-3, 3) img = torchvision.transforms.functional.affine(img, angle, translate, scale, shear, 2, fillcolor=(127, 127, 127)) labels = affine_labels(img_height, img_width, labels, -angle, translate, scale, (-shear, 0)) if self.bw: img = torchvision.transforms.functional.to_grayscale( img, num_output_channels=1) # random left-right flip if self.lr_flip: if random.random() > 0.5: img = torchvision.transforms.functional.hflip(img) # Is this correct? # Not immediately obvious, when composed with the angle shift above labels[:, 1] = img_width - labels[:, 1] labels[:, 3] = img_width - labels[:, 3] # GaussianBlur, needs further development if self.blur: if random.random() > 0.2: arr = np.asarray(img) angle = random.uniform(40, -40) sigma = random.uniform(0, 3.00) seq = iaa.Sequential([iaa.GaussianBlur(sigma=sigma)]) images_aug = seq.augment_images(arr) img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB') #AdditiveGaussianNoise if self.noise: if random.random() > 0.3: arr = np.asarray(img) scale = random.uniform(0, 0.03 * 255) seq = iaa.Sequential([ iaa.AdditiveGaussianNoise(loc=0, scale=scale, per_channel=0.5) ]) images_aug = seq.augment_images(arr) img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB') #SigmoidContrast, need further development if self.contrast: if random.random() > 0.5: arr = np.asarray(img) cutoff = random.uniform(0.45, 0.75) gain = random.randint(5, 10) seq = iaa.Sequential( [iaa.SigmoidContrast(gain=gain, cutoff=cutoff)]) images_aug = seq.augment_images(arr) img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB') #Sharpen, need further development if self.sharpen: if random.random() > 0.3: arr = np.asarray(img) alpha = random.uniform(0, 0.5) seq = iaa.Sharpen(alpha=alpha) images_aug = seq.augment_images(arr) img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB') if self.vis_batch and self.data_aug: vis_post_aug_img = copy.deepcopy(img) tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_post_augmentation.jpg") visualize_and_save_to_local(vis_post_aug_img, labels, tmp_path, box_color="green") if self.vis_batch: self.vis_counter += 1 if self.vis_counter > (self.vis_batch - 1): sys.exit('Finished visualizing enough images. Exiting!') labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) labels[:, (1, 3)] /= self.width labels[:, (2, 4)] /= self.height img = torchvision.transforms.functional.to_tensor(img) labels = F.pad(labels, pad=[0, 0, 0, self.num_targets_per_image - len(labels)], mode="constant") if (labels < 0).sum() > 0: raise Exception(f"labels for image {img_uri} have negative values") return img_uri, img, labels
def __getitem__(self, index): img_path = self.img_files[index] label_path = self.label_files[index] # if hasattr(self, 'imgs'): # img = self.imgs[index] # BGR img = cv2.imread(img_path) # BGR assert img is not None, 'File Not Found ' + img_path h, w, _ = img.shape img, ratio, padw, padh = letterbox( img, height=self.img_size) # 将每幅图resize到img_size # Load labels labels = [] if os.path.isfile(label_path): with open(label_path, 'r') as file: lines = file.read().splitlines( ) # 每一行的内容: class x_center y_center w h 比如 4 0.43 0.36 0.06 0.24,坐标都是归一化过的 x = np.array([x.split() for x in lines], dtype=np.float32) # x: (box_num, 5) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio * w * ( x[:, 1] - x[:, 3] / 2 ) + padw # 因为图像resize了,所以labels中的坐标信息也要相对变化 TODO:理解的不是很透彻 labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh print(labels) # Augment image and labels #if self.augment: # img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size # TODO # if self.augment: # # random left-right flip # lr_flip = True # if lr_flip and random.random() > 0.5: # img = np.fliplr(img) # if nL: # labels[:, 1] = 1 - labels[:, 1] # # # random up-down flip # ud_flip = False # if ud_flip and random.random() > 0.5: # img = np.flipud(img) # if nL: # labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros( (nL, 6)) # clw note: maybe leave index 0 for batch_size dim if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def __getitem__(self, index): img_path = self.img_files[index] label_path = self.label_files[index] # Load image if hasattr(self, 'imgs'): # preloaded img = self.imgs[index] else: img = cv2.imread(img_path) # BGR assert img is not None, 'File Not Found ' + img_path # Letterbox h, w, *_ = img.shape if self.pad_rectangular: new_shape = self.batch_shapes[self.batch[index]] img, ratio, padw, padh = letterbox(img, new_shape=new_shape, mode='rect') else: img, ratio, padw, padh = letterbox(img, new_shape=self.img_size, mode='square') # Load labels labels = [] if os.path.isfile(label_path): # with open(label_path, 'r') as f: # x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh # Augment image and labels if self.augment: img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() > 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() > 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def detect(self): frame_no = -1 # skip_no = 2 if self.output_file: f = open(output_file, "w") while self.vidCap.grab(): frame_no += 1 # skip frames every n frames # if frame_no % skip_no != 0: # continue # start time total_begin = time.time() _, img = self.vidCap.retrieve() # yolov3部分 yolo_begin = time.time() bbox_xyxy, cls_conf, cls_ids = self.yolov3.predict(img) # [x1,y1,x2,y2] yolo_end = time.time() # deepsort部分 ds_begin = time.time() if bbox_xyxy is not None: bbox_cxcywh = xyxy2xywh(bbox_xyxy) outputs = self.deepsort.update(bbox_cxcywh, cls_conf, img) if len(outputs) > 0: # [x1,y1,x2,y2] id bbox_xyxy = outputs[:, :4] ids = outputs[:, -1] img = draw_bboxes(img, bbox_xyxy, ids) # frame,id,tlwh,1,-1,-1,-1 if self.output_file: bbox_tlwh = xyxy2xywh(bbox_xyxy) for i in range(len(bbox_tlwh)): write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % ( frame_no + 1, outputs[i, -1], int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]), int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3])) f.write(write_line) ds_end = time.time() total_end = time.time() if frame_no % 500 == 0: print("frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_no, (yolo_end - yolo_begin), (ds_end - ds_begin), (total_end - total_begin), ((yolo_end - yolo_begin) * 100 / ( total_end - total_begin)), (1 / (total_end - total_begin)))) if self.display is True: cv2.imshow("Test", img) cv2.waitKey(1) if self.save_path: self.output.write(img) if self.output_file: f.close()
def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] # Load image img = self.imgs[index] if img is None: img = cv2.imread(img_path) # BGR assert img is not None, 'File Not Found ' + img_path if self.n < 1001: self.imgs[index] = img # cache image into memory # Augment colorspace augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 # must be < 1.0 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # hue, sat, val S = img_hsv[:, :, 1].astype(np.float32) # saturation V = img_hsv[:, :, 2].astype(np.float32) # value a = (random.random() * 2 - 1) * fraction + 1 b = (random.random() * 2 - 1) * fraction + 1 S *= a V *= b img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255) img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # Letterbox h, w, _ = img.shape if self.rect: shape = self.batch_shapes[self.batch[index]] img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='rect') else: shape = self.img_size img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='square') # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) self.labels[index] = x # save for next time if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh # Augment image and labels if self.augment: img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() > 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() > 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def __next__(self): self.count += 1 if self.count == self.nB: raise StopIteration ia = self.count * self.batch_size ib = min((self.count + 1) * self.batch_size, self.nF) multi_scale = False if multi_scale and self.augment: # Multi-Scale YOLO Training height = random.choice(range(10, 20)) * 32 # 320 - 608 pixels else: # Fixed-Scale YOLO Training height = self.height img_all = [] labels_all = [] for index, files_index in enumerate(range(ia, ib)): img_path = self.img_files[self.shuffled_vector[files_index]] label_path = self.label_files[self.shuffled_vector[files_index]] img = cv2.imread(img_path) # BGR if img is None: print('nooooooooooimages') continue augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: np.clip(S, a_min=0, a_max=255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: np.clip(V, a_min=0, a_max=255, out=V) img_hsv[:, :, 1] = S.astype(np.uint8) img_hsv[:, :, 2] = V.astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) h, w, _ = img.shape img, ratio, padw, padh = resize_square(img, height=height, color=(127.5, 127.5, 127.5)) # Load labels name_classes = load_classes( '/Users/jx/Desktop/jjjjjxxxx/omr_yolo3/cfg/new_duration.names') pitch_classes = load_classes( '/Users/jx/Desktop/jjjjjxxxx/omr_yolo3/cfg/pitch.names') if os.path.isfile(label_path): labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 7) # Normalized xywh to pixel xyxy format labels = labels0.copy() # labels[:, 1] = ratio * w * (labels0[:, 1] - labels0[:, 3] / 2) + padw #labels[:, 2] = ratio * h * (labels0[:, 2] - labels0[:, 4] / 2) + padh #labels[:, 3] = ratio * w * (labels0[:, 1] + labels0[:, 3] / 2) + padw #labels[:, 4] = ratio * h * (labels0[:, 2] + labels0[:, 4] / 2) + padh labels[:, 1] = ratio * labels0[:, 1] + padw labels[:, 2] = ratio * labels0[:, 2] + padh labels[:, 3] = ratio * labels0[:, 3] + padw labels[:, 4] = ratio * labels[:, 4] + padh durations = [] pitchs = [] # for i in labels0[:,5]: # if float(i)==float(1): # durations.append(9) # elif str(i) not in name_classes and str(i)!='0.0': # durations.append(6) # else: # for idx,j in enumerate(name_classes): # if float(i)== float(j): # durations.append(idx) for i in labels0[:, 5]: if str(int(i)) in name_classes: durations.append(int(i)) else: durations.append(10) ##超出音高范围或者没有音高 for i in labels0[:, 6]: if str(int(i)) in pitch_classes: pitchs.append(int(i)) elif int(i) > 15: pitchs.append(15) else: pitchs.append(-5) labels[:, 5] = durations labels[:, 6] = pitchs else: labels = np.array([]) # Augment image and labels if self.augment: img, labels, M = random_affine(img, labels, degrees=(-3, 3), translate=(0.1, 0.1), scale=(0.9, 1.1)) plotFlag = False if plotFlag: import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) if index == 0 else None plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1]) plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-') plt.axis('off') nL = len(labels) if nL > 0: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height if self.augment: # random left-right flip lr_flip = True if lr_flip & (random.random() > 0.5): img = np.fliplr(img) if nL > 0: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip & (random.random() > 0.5): img = np.flipud(img) if nL > 0: labels[:, 2] = 1 - labels[:, 2] img_all.append(img) labels_all.append(torch.from_numpy(labels)) # Normalize assert len(img_all) != 0 img_all = np.stack(img_all)[:, :, :, ::-1].transpose( 0, 3, 1, 2) # BGR to RGB and cv2 to pytorch img_all = np.ascontiguousarray(img_all, dtype=np.float32) # img_all -= self.rgb_mean # img_all /= self.rgb_std img_all /= 255.0 return torch.from_numpy(img_all), labels_all
def __getitem__(self, index): # 在训练中为false if self.image_weights: index = self.indices[index] hyp = self.hyp if self.mosaic: # Load mosaic img, labels = load_mosaic(self, index) # index是一个int类型 shapes = None else: # Load image # h,w是经过调整之后的 其中有一个值等于img_size img是经过插值之后的图像(且是BGR格式) 其中一边等于img_size img, (h0, w0), (h, w) = load_image(self, index) # Letterbox # shape存放的height 和 width shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ( (h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] # self.labels[index]表示index对应的图片中所有的gtbox [] x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not self.mosaic: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh # 这里的xyxy是未归一化的 xywh也是未归一化的 labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] # [batch, cls, x, y, w, h] labels_out = torch.zeros((nL, 6)) # nl if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) # print(img.shape) return torch.from_numpy(img), labels_out, self.img_files[index], shapes
def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] mosaic = True and self.augment # load 4 images at a time into a mosaic (only during training) if mosaic: # Load mosaic img, labels = load_mosaic(self, index) h, w, _ = img.shape else: # Load image img = load_image(self, index) # Letterbox h, w, _ = img.shape #Added code to perform warp affine to 640 X 384 ''' #cv2.imshow("Before padding: ", img0) #cv2.waitKey(0) inp_width = 640 inp_height = 384 c = np.array([1920 / 2., 1080/ 2.], dtype=np.float32) s = 1920 trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) inp_image = cv2.warpAffine(img0, trans_input, (inp_width, inp_height), flags=cv2.INTER_LINEAR) #cv2.imshow('Warp Affine: ', inp_image) #cv2.waitKey(0) #img = letterbox(inp_image, new_shape=self.img_size)[0] img = inp_image ''' #End of added warp affine if self.rect: img, ratio, padw, padh = letterbox( img, self.batch_shapes[self.batch[index]], mode='rect') #Hard coded values #img, ratio, padw, padh = letterbox(img, np.array([384, 640]), mode='rect') else: img, ratio, padw, padh = letterbox(img, self.img_size, mode='square') ''' print("Ratio: ", ratio) print("padw: ", padw) print('padh: ', padh) cv2.imshow('output', img) if cv2.waitKey(0) == ord('q'): exit() ''' # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh if self.augment: # Augment colorspace augment_hsv(img, hgain=self.hyp['hsv_h'], sgain=self.hyp['hsv_s'], vgain=self.hyp['hsv_v']) # Augment imagespace g = 0.0 if mosaic else 1.0 # do not augment mosaics hyp = self.hyp img, labels = random_affine(img, labels, degrees=hyp['degrees'] * g, translate=hyp['translate'] * g, scale=hyp['scale'] * g, shear=hyp['shear'] * g) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def get_data(self, img_path, label_path): height = self.height width = self.width img = cv2.imread(img_path) # BGR if img is None: raise ValueError('File corrupt {}'.format(img_path)) augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% fraction = 0.50 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: np.clip(S, a_min=0, a_max=255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: np.clip(V, a_min=0, a_max=255, out=V) img_hsv[:, :, 1] = S.astype(np.uint8) img_hsv[:, :, 2] = V.astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) h, w, _ = img.shape img, ratio, padw, padh = letterbox(img, height=height, width=width) # Load labels if os.path.isfile(label_path): labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6) # Normalized xywh to pixel xyxy format labels = labels0.copy() labels[:, 2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw labels[:, 3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh labels[:, 4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw labels[:, 5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh else: labels = np.array([]) # Augment image and labels if self.augment: img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20)) plotFlag = False if plotFlag: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.figure(figsize=(50, 50)) plt.imshow(img[:, :, ::-1]) plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-') plt.axis('off') plt.savefig('test.jpg') time.sleep(10) nL = len(labels) if nL > 0: # convert xyxy to xywh labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy()) # / height labels[:, 2] /= width labels[:, 3] /= height labels[:, 4] /= width labels[:, 5] /= height if self.augment: # random left-right flip lr_flip = True if lr_flip & (random.random() > 0.5): img = np.fliplr(img) if nL > 0: labels[:, 2] = 1 - labels[:, 2] img = np.ascontiguousarray(img[:, :, ::-1]) # BGR to RGB if self.transforms is not None: img = self.transforms(img) return img, labels, img_path, (h, w)
def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] mosaic = True and self.augment # load 4 images at a time into a mosaic (only during training) if mosaic: # Load mosaic img, labels = load_mosaic(self, index) h, w, _ = img.shape else: # Load image img = load_image(self, index) # Letterbox h, w, _ = img.shape if self.rect: img, ratio, padw, padh = letterbox( img, self.batch_shapes[self.batch[index]], mode='rect') else: img, ratio, padw, padh = letterbox(img, self.img_size, mode='square') # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh if self.augment: # Augment imagespace g = 0.0 if mosaic else 1.0 # do not augment mosaics hyp = self.hyp img, labels = random_affine(img, labels, degrees=hyp['degrees'] * g, translate=hyp['translate'] * g, scale=hyp['scale'] * g, shear=hyp['shear'] * g) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)
def __getitem__(self, index): if self.image_weights: index = self.indices[index] hyp = self.hyp if self.mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None # MixUp https://arxiv.org/pdf/1710.09412.pdf # if random.random() < 0.5: # img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) # r = np.random.beta(0.3, 0.3) # mixup ratio, alpha=beta=0.3 # img = (img * r + img2 * (1 - r)).astype(np.uint8) # labels = np.concatenate((labels, labels2), 0) else: # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ( (h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not self.mosaic: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes
def augment_collection(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] mosaic = True and self.augment # load 4 images at a time into a mosaic (only during training) if mosaic and random.random() < 0.5: # modify: add random to mosaic # Load mosaic img, labels = load_mosaic(self, index) h, w = img.shape[:2] else: # Load image img = load_image(self, index) # Letterbox h, w = img.shape[:2] if self.rect: img, ratio, padw, padh = letterbox( img, self.batch_shapes[self.batch[index]], mode='rect') else: img, ratio, padw, padh = letterbox(img, self.img_size, mode='square') # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh if self.augment: # Augment colorspace augment_hsv(img, hgain=self.hyp['hsv_h'], sgain=self.hyp['hsv_s'], vgain=self.hyp['hsv_v']) # Augment imagespace g = 0.0 if mosaic else 1.0 # do not augment mosaics g = 1 hyp = self.hyp img, labels = random_affine(img, labels, degrees=hyp['degrees'] * g, translate=hyp['translate'] * g, scale=hyp['scale'] * g, shear=hyp['shear'] * g) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False # acitve for topview if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] # labels_out = torch.zeros((nL, 6)) # todo: add gt_score to labels_out(i.e. targets) labels_out = np.zeros( (nL, 7), dtype='float32') # add gt_score to labels_out(i.e. targets) if nL: # labels_out[:, 1:] = torch.from_numpy(labels) labels_out[:, 1] = (labels[:, 0]) # cls labels_out[:, 2] = 1 # gt_score labels_out[:, 3:] = (labels[:, 1:]) # [x y w h] # Normalize img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if norm_with_mean_std: img[0] = (img[0] - 0.485) / 0.229 img[1] = (img[1] - 0.456) / 0.224 img[2] = (img[2] - 0.406) / 0.225 return img, labels_out, img_path, (h, w)
def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] hyp = self.hyp mosaic = True and self.augment # load 4 images at a time into a mosaic (only during training) if mosaic: # Load mosaic img, labels = load_mosaic(self, index) h, w = img.shape[:2] ratio, pad = None, None else: # Load image img = load_image(self, index) # Letterbox h, w = img.shape[:2] shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[ 0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[ 1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not mosaic: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, img_path, ((h, w), (ratio, pad))
def __getitem__(self, index): if self.image_weights: index = self.indices[index] hyp = self.hyp if self.mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None else: # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ( (h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] x = self.labels[index] if x is not None and x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] #labels[:, 5:] = x[:, 5:] ###6 for i in range(5, 21, 2): ###12 labels[:, i] = ratio[0] * w * x[:, i] + pad[0] ###12 labels[:, i + 1] = ratio[1] * h * x[:, i + 1] + pad[1] ###12 if self.augment: # Augment imagespace if not self.mosaic: img, labels = random_affine(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width for i in range(5, 21, 2): ###12 labels[:, i + 1] /= img.shape[0] ###12 labels[:, i] /= img.shape[1] ###12 if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] for i in range(5, 20, 2): ###12 labels[:, i] = 1 - labels[:, i] ###12 # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] for i in range(6, 21, 2): ###12 labels[:, i] = 1 - labels[:, i] ###12 labels_out = torch.zeros((nL, 6 + 16)) ###6 if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.img_files[index], shapes
def __getitem__(self, index): # 重写 Dataset父类的方法 hyp = self.hyp if self.mosaic: # load mosaic img, labels = load_mosaic(self, index) shapes = None else: # load image img, (h0, w0), (h, w) = load_image(self, index) # letterbox shape = self.batch_shapes[self.batch[ index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scale_up=self.augment) shapes = (h0, w0), ( (h / h0, w / w0), pad) # for COCO mAP rescaling # load labels labels = [] x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() # label: class, x, y, w, h labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] if self.augment: # Augment imagespace if not self.mosaic: img, labels = random_affine(img, labels, degrees=hyp["degrees"], translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"]) # Augment colorspace augment_hsv(img, h_gain=hyp["hsv_h"], s_gain=hyp["hsv_s"], v_gain=hyp["hsv_v"]) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0-1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # 1 - x_center # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] # 1 - y_center labels_out = torch.zeros((nL, 6)) # nL: number of labels if nL: # labels_out[:, 0] = index labels_out[:, 1:] = torch.from_numpy(labels) # Convert BGR to RGB, and HWC to CHW(3x512x512) img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) return torch.from_numpy( img), labels_out, self.img_files[index], shapes, index
def __getitem__(self, index): if self.image_weights: index = self.indices[index] img_path = self.img_files[index] label_path = self.label_files[index] mosaic = True and self.augment # load 4 images at a time into a mosaic (only during training) if mosaic: # Load mosaic img, labels = load_mosaic(self, index) h, w = img.shape[:2] else: # Load image img = load_image(self, index) # Letterbox h, w = img.shape[:2] if self.rect: img, ratio, padw, padh = letterbox( img, self.batch_shapes[self.batch[index]], mode='rect') else: img, ratio, padw, padh = letterbox(img, self.img_size, mode='square') # Load labels labels = [] if os.path.isfile(label_path): x = self.labels[index] if x is None: # labels not preloaded with open(label_path, 'r') as f: x = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh if self.augment or self.augment is False: #notice test need't augment,so the channel is bgr # # Augment colorspace # augment_hsv(img, hgain=self.hyp['hsv_h'], sgain=self.hyp['hsv_s'], vgain=self.hyp['hsv_v']) ## cv2.imshow("xxxx",img) ## cv2.waitKey(0) # # Augment imagespace # g = 0.0 if mosaic else 1.0 # do not augment mosaics # hyp = self.hyp # img, labels = random_affine(img, labels, # degrees=hyp['degrees'] * g, # translate=hyp['translate'] * g, # scale=hyp['scale'] * g, # shear= hyp['shear'] *g) # Augment colorspace img = img[:, :, (2, 1, 0)] #bgr(cv2) to rgb(plt) p = [ iaa.Multiply([1, 2.5, 0.5, 1.5]), iaa.SigmoidContrast(gain=10, cutoff=[0.75, 1, 0.5]), # iaa.SigmoidContrast(gain=3,cutoff=[0,1],per_channel=0.9) ] sequence_iaa = iaa.Sequential([random.choice(p)]) img = sequence_iaa.augment_image(img) # Augment imagespace g = 0.0 if mosaic else 1.0 # do not augment mosaics hyp = self.hyp img, labels = random_affine(img, labels, degrees=0, translate=0, scale=0, shear=0) # cv2.imshow("xxxx",img) # cv2.waitKey(0) # print(hyp['shear'] * g) # from matplotlib import pyplot as plt # plt.imshow(img) # plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis # plt.show() # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: # convert xyxy to xywh labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # Normalize coordinates 0 - 1 labels[:, [2, 4]] /= img.shape[0] # height labels[:, [1, 3]] /= img.shape[1] # width if self.augment: # random left-right flip lr_flip = True if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels_out = torch.zeros((nL, 6)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # Normalize # img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = img[:, :, :].transpose(2, 0, 1) # RGB, to 3x416x416 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return torch.from_numpy(img), labels_out, img_path, (h, w)