class CreateDetectionAnchors(): def __init__(self, train_path): self.xmlProcess = XMLProcess() self.image_process = ImageProcess() self.detection_sample = DetectionSample(train_path, detect2d_config.className) self.detection_sample.read_sample() self.dataset_process = DetectionDataSetProcess() def get_anchors(self, number): wh_numpy = self.get_width_height() # Kmeans calculation k = cluster.vq.kmeans(wh_numpy, number)[0] k = k[np.argsort(k.prod(1))] # sort small to large # Measure IoUs iou = np.stack([self.compute_iou(wh_numpy, x) for x in k], 0) biou = iou.max(0)[0] # closest anchor IoU print('Best possible recall: %.3f' % (biou > 0.2635).float().mean()) # BPR (best possible recall) # Print print( 'kmeans anchors (n=%g, img_size=%g, IoU=%.2f/%.2f/%.2f-min/mean/best): ' % (number, detect2d_config.imgSize, biou.min(), iou.mean(), biou.mean()), end='') for i, x in enumerate(k): print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') def get_width_height(self): count = self.detection_sample.get_sample_count() result = [] for index in range(count): img_path, label_path = self.detection_sample.get_sample_path(index) src_image, rgb_image = self.image_process.readRgbImage(img_path) _, _, boxes = self.xmlProcess.parseRectData(label_path) rgb_image, labels = self.dataset_process.resize_dataset( rgb_image, detect2d_config.imgSize, boxes, detect2d_config.className) temp = np.zeros((len(labels), 2), dtype=np.float32) for index, object in enumerate(labels): temp[index, :] = np.array([object.width(), object.height()]) result.append(temp) return np.concatenate(result, axis=0) def compute_iou(self, list_x, x2): result = np.zeros((len(list_x), 1), dtype=np.float32) for index, x1 in enumerate(list_x): min_w = min(x1[0], x2[0]) min_h = min(x1[0], x2[1]) iou = (min_w * min_h) / (x1[0] * x1[1] + x2[0] * x2[1] - min_w * min_h) result[index] = iou return result
class ConvertSegmentionLable(): def __init__(self): self.save_label_dir = "SegmentLabel" self.annotation_post = ".png" self.dirProcess = DirProcess() self.image_process = ImageProcess() def convert_segment_label(self, label_dir, is_gray, class_list): output_dir = os.path.join(label_dir, "../%s" % self.save_label_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) label_list = list(self.dirProcess.getDirFiles(label_dir, "*.*")) for label_path in label_list: path, file_name_and_post = os.path.split(label_path) print(label_path) mask = self.process_segment_label(label_path, is_gray, class_list) if mask is not None: save_path = os.path.join(output_dir, file_name_and_post) cv2.imwrite(save_path, mask) def process_segment_label(self, label_path, is_gray, class_list): if is_gray: mask = self.image_process.read_gray_image(label_path) else: _, mask = self.image_process.readRgbImage(label_path) if mask is not None: if is_gray: mask = self.convert_gray_label(mask, class_list) else: mask = self.convert_color_label(mask, class_list) return mask def convert_gray_label(self, mask, class_list): shape = mask.shape # shape = [height, width] result = np.full(shape, 250, dtype=np.uint8) for index, value in enumerate(class_list): gray_value = int(value[1].strip()) result[mask == gray_value] = index return result def convert_color_label(self, mask, class_list): shape = mask.shape[:2] # shape = [height, width] result = np.full(shape, 250, dtype=np.uint8) for index, value in enumerate(class_list): value_list = [int(x) for x in value[1].spilt(',') if x.strip()] color_value = np.array(value_list, dtype=np.uint8) temp1 = mask[:, :] == color_value temp2 = np.sum(temp1, axis=2) result[temp2 == 3] = index return result
class ImagesLoader(DataLoader): def __init__(self, input_dir, image_size=(416, 416)): super().__init__() self.image_size = image_size self.imageProcess = ImageProcess() self.dirProcess = DirProcess() self.dataset_process = ImageDataSetProcess() temp_files = self.dirProcess.getDirFiles(input_dir, "*.*") self.files = list(temp_files) self.count = len(self.files) self.color = (127.5, 127.5, 127.5) def __iter__(self): self.index = -1 return self def __next__(self): self.index += 1 if self.index == self.count: raise StopIteration image_path = self.files[self.index] # Read image srcImage, rgb_image = self.imageProcess.readRgbImage(image_path) # Padded resize rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image, self.image_size, self.color) rgb_image = self.dataset_process.image_normaliza(rgb_image) numpy_image = self.dataset_process.numpy_transpose(rgb_image) torch_image = self.all_numpy_to_tensor(numpy_image) return srcImage, torch_image def __len__(self): return self.count
class DetectionTrainDataloader(DataLoader): def __init__(self, train_path, class_name, batch_size=1, image_size=(768, 320), multi_scale=False, is_augment=False, balanced_sample=False): super().__init__() self.className = class_name self.multi_scale = multi_scale self.is_augment = is_augment self.balanced_sample = balanced_sample self.batch_size = batch_size self.image_size = image_size self.detection_sample = DetectionSample(train_path, class_name, balanced_sample) self.detection_sample.read_sample() self.xmlProcess = XMLProcess() self.image_process = ImageProcess() self.dataset_process = DetectionDataSetProcess() self.dataset_augment = DetectionDataAugment() self.nF = self.detection_sample.get_sample_count() self.nB = math.ceil(self.nF / batch_size) # number of batches def __iter__(self): self.count = -1 self.detection_sample.shuffle_sample() return self def __next__(self): self.count += 1 if self.count == self.nB: raise StopIteration numpy_images = [] numpy_labels = [] class_index = self.get_random_class() start_index = self.detection_sample.get_sample_start_index( self.count, self.batch_size, class_index) width, height = self.get_image_size() stop_index = start_index + self.batch_size for temp_index in range(start_index, stop_index): img_path, label_path = self.detection_sample.get_sample_path( temp_index, class_index) src_image, rgb_image = self.image_process.readRgbImage(img_path) _, _, boxes = self.xmlProcess.parseRectData(label_path) rgb_image, labels = self.dataset_process.resize_dataset( rgb_image, (width, height), boxes, self.className) rgb_image, labels = self.dataset_augment.augment(rgb_image, labels) rgb_image, labels = self.dataset_process.normaliza_dataset( rgb_image, labels, (width, height)) labels = self.dataset_process.change_outside_labels(labels) numpy_images.append(rgb_image) torch_labels = self.dataset_process.numpy_to_torch(labels, flag=0) numpy_labels.append(torch_labels) numpy_images = np.stack(numpy_images) torch_images = self.all_numpy_to_tensor(numpy_images) return torch_images, numpy_labels def __len__(self): return self.nB # number of batches def get_random_class(self): class_index = None if self.balanced_sample: class_index = np.random.randint(0, len(self.className)) print("loading labels {}".format(self.className[class_index])) return class_index def get_image_size(self): if self.multi_scale: # Multi-Scale YOLO Training print("wrong code for MultiScale") width = random.choice(range(10, 20)) * 32 # 320 - 608 pixels scale = float(self.image_size[0]) / float(self.image_size[1]) height = int(round(float(width / scale) / 32.0) * 32) else: # Fixed-Scale YOLO Training width = self.image_size[0] height = self.image_size[1] return width, height