class SegmentDatasetProcess(BaseDataSetProcess): def __init__(self): super().__init__() self.dataset_process = ImageDataSetProcess() self.image_pad_color = (0, 0, 0) self.label_pad_color = 250 def normaliza_dataset(self, src_image): image = self.dataset_process.image_normaliza(src_image) image = self.dataset_process.numpy_transpose(image) return image def resize_dataset(self, src_image, image_size, label, volid_label_seg=None, valid_label_seg=None): image, ratio, pad = self.dataset_process.image_resize_square( src_image, image_size, color=self.image_pad_color) target = self.encode_segmap(np.array(label, dtype=np.uint8), volid_label_seg, valid_label_seg) target, ratio, pad = self.dataset_process.image_resize_square( target, image_size, self.label_pad_color) return image, target def change_label(self, label, valid_label_seg): valid_masks = np.zeros(label.shape) for l in range(0, len(valid_label_seg)): valid_mask = label == l # set false to position of seg that not in valid_label_seg valid_masks += valid_mask # set 0.0 to position of seg that not in valid_label_seg valid_masks[valid_masks == 0] = -1 seg = np.float32(label) * valid_masks seg[seg < 0] = self.label_pad_color seg = np.uint8(seg) return seg def encode_segmap(self, mask, volid_label, valid_label): classes = -np.ones([100, 100]) valid = [x for j in valid_label for x in j] for i in range(0, len(valid_label)): classes[i, :len(valid_label[i])] = valid_label[i] for label in volid_label: mask[mask == label] = self.label_pad_color for validc in valid: mask[mask == validc] = np.uint8(np.where(classes == validc)[0]) return mask
class VideoLoader(DataLoader): def __init__(self, video_path, image_size=(416, 416)): super().__init__() self.video_process = VideoProcess() self.dataset_process = ImageDataSetProcess() if not self.video_process.isVideoFile(video_path) or \ not self.video_process.openVideo(video_path): raise Exception("Invalid path!", video_path) self.image_size = image_size self.count = int(self.video_process.getFrameCount()) self.color = (127.5, 127.5, 127.5) def __iter__(self): self.index = -1 return self def __next__(self): self.index += 1 success, src_image, rgb_image = self.video_process.readRGBFrame() if not success: raise StopIteration # padded resize rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image, self.image_size, self.color) rgb_image = self.dataset_process.image_normaliza(rgb_image) numpy_image = self.dataset_process.numpy_transpose(rgb_image) torch_image = self.all_numpy_to_tensor(numpy_image, 0) return src_image, torch_image def __len__(self): return self.count
class ImagesLoader(DataLoader): def __init__(self, input_dir, image_size=(416, 416)): super().__init__() self.image_size = image_size self.imageProcess = ImageProcess() self.dirProcess = DirProcess() self.dataset_process = ImageDataSetProcess() temp_files = self.dirProcess.getDirFiles(input_dir, "*.*") self.files = list(temp_files) self.count = len(self.files) self.color = (127.5, 127.5, 127.5) def __iter__(self): self.index = -1 return self def __next__(self): self.index += 1 if self.index == self.count: raise StopIteration image_path = self.files[self.index] # Read image srcImage, rgb_image = self.imageProcess.readRgbImage(image_path) # Padded resize rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image, self.image_size, self.color) rgb_image = self.dataset_process.image_normaliza(rgb_image) numpy_image = self.dataset_process.numpy_transpose(rgb_image) torch_image = self.all_numpy_to_tensor(numpy_image) return srcImage, torch_image def __len__(self): return self.count
class DetectionDataSetProcess(BaseDataSetProcess): def __init__(self): super().__init__() self.dataset_process = ImageDataSetProcess() self.image_pad_color = (0, 0, 0) def normaliza_dataset(self, src_image, labels=None, image_size=None): image = self.dataset_process.image_normaliza(src_image) image = self.dataset_process.numpy_transpose(image) result = None if labels is not None: result = np.zeros((len(labels), 5), dtype=np.float32) for index, rect in enumerate(labels): class_id = rect.class_id x, y = rect.center() x /= image_size[0] y /= image_size[1] width = rect.width() / image_size[0] height = rect.height() / image_size[1] result[index, :] = np.array([class_id, x, y, width, height]) return image, result def resize_dataset(self, src_image, image_size, boxes=None, class_name=None): labels = [] image, ratio, pad = self.dataset_process.image_resize_square( src_image, image_size, color=self.image_pad_color) if boxes is not None: for box in boxes: if box.name in class_name: rect = Rect2D() rect.class_id = class_name.index(box.name) rect.min_corner.x = ratio * box.min_corner.x + pad[0] // 2 rect.min_corner.y = ratio * box.min_corner.y + pad[1] // 2 rect.max_corner.x = ratio * box.max_corner.x + pad[0] // 2 rect.max_corner.y = ratio * box.max_corner.y + pad[1] // 2 labels.append(rect) return image, labels def change_outside_labels(self, labels): delete_index = [] # reject warped points outside of image (0.999 for the image boundary) for i, label in enumerate(labels): if label[2] + label[4] / 2 >= float(1): yoldH = label[2] - label[4] / 2 label[2] = (yoldH + float(0.999)) / float(2) label[4] = float(0.999) - yoldH if label[1] + label[3] / 2 >= float(1): yoldW = label[1] - label[3] / 2 label[1] = (yoldW + float(0.999)) / float(2) label[3] = float(0.999) - yoldW # filter the small object (w for label[3] in 1280 is limit to 6.8 pixel (6.8/1280=0.0053)) if label[3] < 0.0053 or label[4] < 0.0055: # filter the small object (h for label[4] in 720 is limit to 4.0 pixel (4.0/1280=0.0053)) delete_index.append(i) labels = np.delete(labels, delete_index, axis=0) return labels