class CreateDetectionAnchors():
    def __init__(self, train_path):
        self.xmlProcess = XMLProcess()
        self.image_process = ImageProcess()
        self.detection_sample = DetectionSample(train_path,
                                                detect2d_config.className)
        self.detection_sample.read_sample()
        self.dataset_process = DetectionDataSetProcess()

    def get_anchors(self, number):
        wh_numpy = self.get_width_height()
        # Kmeans calculation
        k = cluster.vq.kmeans(wh_numpy, number)[0]
        k = k[np.argsort(k.prod(1))]  # sort small to large
        # Measure IoUs
        iou = np.stack([self.compute_iou(wh_numpy, x) for x in k], 0)
        biou = iou.max(0)[0]  # closest anchor IoU
        print('Best possible recall: %.3f' %
              (biou > 0.2635).float().mean())  # BPR (best possible recall)

        # Print
        print(
            'kmeans anchors (n=%g, img_size=%g, IoU=%.2f/%.2f/%.2f-min/mean/best): '
            % (number, detect2d_config.imgSize, biou.min(), iou.mean(),
               biou.mean()),
            end='')
        for i, x in enumerate(k):
            print('%i,%i' % (round(x[0]), round(x[1])),
                  end=',  ' if i < len(k) - 1 else '\n')

    def get_width_height(self):
        count = self.detection_sample.get_sample_count()
        result = []
        for index in range(count):
            img_path, label_path = self.detection_sample.get_sample_path(index)
            src_image, rgb_image = self.image_process.readRgbImage(img_path)
            _, _, boxes = self.xmlProcess.parseRectData(label_path)
            rgb_image, labels = self.dataset_process.resize_dataset(
                rgb_image, detect2d_config.imgSize, boxes,
                detect2d_config.className)
            temp = np.zeros((len(labels), 2), dtype=np.float32)
            for index, object in enumerate(labels):
                temp[index, :] = np.array([object.width(), object.height()])
            result.append(temp)
        return np.concatenate(result, axis=0)

    def compute_iou(self, list_x, x2):
        result = np.zeros((len(list_x), 1), dtype=np.float32)
        for index, x1 in enumerate(list_x):
            min_w = min(x1[0], x2[0])
            min_h = min(x1[0], x2[1])
            iou = (min_w * min_h) / (x1[0] * x1[1] + x2[0] * x2[1] -
                                     min_w * min_h)
            result[index] = iou
        return result
class DetectionValDataLoader(data.Dataset):
    def __init__(self, val_path, class_name, image_size=(416, 416)):
        super().__init__()
        self.image_size = image_size
        self.detection_sample = DetectionSample(val_path, class_name, False)
        self.detection_sample.read_sample()
        self.image_process = ImageProcess()
        self.dataset_process = DetectionDataSetProcess()

    def __getitem__(self, index):
        img_path, label_path = self.detection_sample.get_sample_path(index)
        src_image, rgb_image = self.image_process.readRgbImage(img_path)
        rgb_image, _ = self.dataset_process.resize_dataset(
            rgb_image, self.image_size)
        rgb_image, _ = self.dataset_process.normaliza_dataset(rgb_image)
        rgb_image = self.dataset_process.numpy_to_torch(rgb_image, flag=0)
        return img_path, src_image, rgb_image

    def __len__(self):
        return self.detection_sample.get_sample_count()
Example #3
0
class DetectionTrainDataloader(DataLoader):
    def __init__(self,
                 train_path,
                 class_name,
                 batch_size=1,
                 image_size=(768, 320),
                 multi_scale=False,
                 is_augment=False,
                 balanced_sample=False):
        super().__init__()
        self.className = class_name
        self.multi_scale = multi_scale
        self.is_augment = is_augment
        self.balanced_sample = balanced_sample
        self.batch_size = batch_size
        self.image_size = image_size

        self.detection_sample = DetectionSample(train_path, class_name,
                                                balanced_sample)
        self.detection_sample.read_sample()
        self.xmlProcess = XMLProcess()
        self.image_process = ImageProcess()
        self.dataset_process = DetectionDataSetProcess()
        self.dataset_augment = DetectionDataAugment()

        self.nF = self.detection_sample.get_sample_count()
        self.nB = math.ceil(self.nF / batch_size)  # number of batches

    def __iter__(self):
        self.count = -1
        self.detection_sample.shuffle_sample()
        return self

    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration
        numpy_images = []
        numpy_labels = []

        class_index = self.get_random_class()
        start_index = self.detection_sample.get_sample_start_index(
            self.count, self.batch_size, class_index)
        width, height = self.get_image_size()

        stop_index = start_index + self.batch_size
        for temp_index in range(start_index, stop_index):
            img_path, label_path = self.detection_sample.get_sample_path(
                temp_index, class_index)
            src_image, rgb_image = self.image_process.readRgbImage(img_path)
            _, _, boxes = self.xmlProcess.parseRectData(label_path)

            rgb_image, labels = self.dataset_process.resize_dataset(
                rgb_image, (width, height), boxes, self.className)
            rgb_image, labels = self.dataset_augment.augment(rgb_image, labels)
            rgb_image, labels = self.dataset_process.normaliza_dataset(
                rgb_image, labels, (width, height))

            labels = self.dataset_process.change_outside_labels(labels)

            numpy_images.append(rgb_image)

            torch_labels = self.dataset_process.numpy_to_torch(labels, flag=0)
            numpy_labels.append(torch_labels)

        numpy_images = np.stack(numpy_images)
        torch_images = self.all_numpy_to_tensor(numpy_images)

        return torch_images, numpy_labels

    def __len__(self):
        return self.nB  # number of batches

    def get_random_class(self):
        class_index = None
        if self.balanced_sample:
            class_index = np.random.randint(0, len(self.className))
            print("loading labels {}".format(self.className[class_index]))
        return class_index

    def get_image_size(self):
        if self.multi_scale:
            # Multi-Scale YOLO Training
            print("wrong code for MultiScale")
            width = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
            scale = float(self.image_size[0]) / float(self.image_size[1])
            height = int(round(float(width / scale) / 32.0) * 32)
        else:
            # Fixed-Scale YOLO Training
            width = self.image_size[0]
            height = self.image_size[1]
        return width, height