コード例 #1
0
class ComputeImagesMean():

    def __init__(self, image_size):
        self.image_size = image_size
        self.dir_process = DirProcess()
        self.image_process = ImageProcess()
        self.dataset_process = ImageDataSetProcess()

    def compute(self, train_path):
        numpy_images = []
        path, _ = os.path.split(train_path)
        images_dir = os.path.join(path, "../JPEGImages")
        for line_data in self.dir_process.getFileData(train_path):
            data_list = [x.strip() for x in line_data.split() if x.strip()]
            if len(data_list) >= 1:
                image_path = os.path.join(images_dir, data_list[0])
                src_image, rgb_image = self.image_process.readRgbImage(image_path)
                rgb_image = self.dataset_process.image_resize(rgb_image, self.image_size)
                normaliza_image = self.dataset_process.image_normaliza(rgb_image)
                numpy_images.append(normaliza_image)
            else:
                print("read %s image path error!" % data_list)
        numpy_images = np.stack(numpy_images)
        mean = np.mean(numpy_images, axis=(0, 1, 2))
        std = np.std(numpy_images, axis=(0, 1, 2))
        return mean, std
コード例 #2
0
class VideoLoader(DataLoader):

    def __init__(self, video_path, image_size=(416, 416)):
        super().__init__()
        self.video_process = VideoProcess()
        self.dataset_process = ImageDataSetProcess()
        if not self.video_process.isVideoFile(video_path) or \
                not self.video_process.openVideo(video_path):
            raise Exception("Invalid path!", video_path)
        self.image_size = image_size
        self.count = int(self.video_process.getFrameCount())
        self.color = (127.5, 127.5, 127.5)

    def __iter__(self):
        self.index = -1
        return self

    def __next__(self):
        self.index += 1
        success, src_image, rgb_image = self.video_process.readRGBFrame()

        if not success:
            raise StopIteration

        # padded resize
        rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image,
                                                                   self.image_size,
                                                                   self.color)
        rgb_image = self.dataset_process.image_normaliza(rgb_image)
        numpy_image = self.dataset_process.numpy_transpose(rgb_image)
        torch_image = self.all_numpy_to_tensor(numpy_image, 0)
        return src_image, torch_image

    def __len__(self):
        return self.count
コード例 #3
0
 def __init__(self):
     self.is_augment_hsv = True
     self.is_augment_affine = True
     self.is_lr_flip = True
     self.is_up_flip = False
     self.dataset_process = ImageDataSetProcess()
     self.image_augment = ImageDataAugment()
コード例 #4
0
 def __init__(self, mean, std):
     super().__init__()
     self.dataset_process = ImageDataSetProcess()
     self.mean = np.array(mean, dtype=np.float32)
     self.std = np.array(std, dtype=np.float32)
     self.normalize_transform = self.torchvision_process.torch_normalize(
         flag=0, mean=self.mean, std=self.std)
コード例 #5
0
 def __init__(self):
     torchvision_process = TorchVisionProcess()
     self.augment_transform = torchvision_process.torch_data_augment()
     self.dataset_process = ImageDataSetProcess()
     self.image_augment = ImageDataAugment()
     self.is_torchvision_augment = True
     self.is_augment_hsv = True
     self.is_augment_affine = True
     self.is_lr_flip = True
コード例 #6
0
 def __init__(self, video_path, image_size=(416, 416)):
     super().__init__()
     self.video_process = VideoProcess()
     self.dataset_process = ImageDataSetProcess()
     if not self.video_process.isVideoFile(video_path) or \
             not self.video_process.openVideo(video_path):
         raise Exception("Invalid path!", video_path)
     self.image_size = image_size
     self.count = int(self.video_process.getFrameCount())
     self.color = (127.5, 127.5, 127.5)
コード例 #7
0
 def __init__(self, input_dir, image_size=(416, 416)):
     super().__init__()
     self.image_size = image_size
     self.imageProcess = ImageProcess()
     self.dirProcess = DirProcess()
     self.dataset_process = ImageDataSetProcess()
     temp_files = self.dirProcess.getDirFiles(input_dir, "*.*")
     self.files = list(temp_files)
     self.count = len(self.files)
     self.color = (127.5, 127.5, 127.5)
コード例 #8
0
class SegmentResultProcess():
    def __init__(self):
        self.dataset_process = ImageDataSetProcess()

    def get_detection_result(self, prediction, threshold=0):
        result = None
        if prediction.ndim == 2:
            result = (prediction >= threshold).astype(int)
            # print(set(list(result.flatten())))
        elif prediction.ndim == 3:
            result = np.argmax(prediction, axis=0)
        elif prediction.ndim == 4:
            result = np.argmax(prediction, axis=1)
        return result

    def resize_segmention_result(self, src_size, image_size,
                                 segmention_result):
        ratio, pad = self.dataset_process.resize_square_size(
            src_size, image_size)
        start_h = pad[1] // 2
        stop_h = image_size[1] - (pad[1] - (pad[1] // 2))
        start_w = pad[0] // 2
        stop_w = image_size[0] - (pad[0] - (pad[0] // 2))
        result = segmention_result[start_h:stop_h, start_w:stop_w]
        result = result.astype(np.float32)
        result = self.dataset_process.image_resize(result, src_size)
        return result

    def output_feature_map_resize(self, input_data, target):
        target = target.type(input_data.dtype)
        n, c, h, w = input_data.size()
        nt, ht, wt = target.size()
        # Handle inconsistent size between input and target
        if h > ht and w > wt:  # upsample labels
            target = target.unsequeeze(1)
            target = torch.nn.functional.upsample(target,
                                                  size=(h, w),
                                                  mode='nearest')
            target = target.sequeeze(1)
        elif h < ht and w < wt:  # upsample images
            input_data = torch.nn.functional.upsample(input_data,
                                                      size=(ht, wt),
                                                      mode='bilinear')
        elif h == ht and w == wt:
            pass
        else:
            print("input_data: (%d,%d) and target: (%d,%d) error " %
                  (h, w, ht, wt))
            raise Exception("segment_data_resize error")
        return input_data, target
コード例 #9
0
class SegmentDatasetProcess(BaseDataSetProcess):
    def __init__(self):
        super().__init__()
        self.dataset_process = ImageDataSetProcess()
        self.image_pad_color = (0, 0, 0)
        self.label_pad_color = 250

    def normaliza_dataset(self, src_image):
        image = self.dataset_process.image_normaliza(src_image)
        image = self.dataset_process.numpy_transpose(image)
        return image

    def resize_dataset(self,
                       src_image,
                       image_size,
                       label,
                       volid_label_seg=None,
                       valid_label_seg=None):
        image, ratio, pad = self.dataset_process.image_resize_square(
            src_image, image_size, color=self.image_pad_color)
        target = self.encode_segmap(np.array(label, dtype=np.uint8),
                                    volid_label_seg, valid_label_seg)
        target, ratio, pad = self.dataset_process.image_resize_square(
            target, image_size, self.label_pad_color)
        return image, target

    def change_label(self, label, valid_label_seg):
        valid_masks = np.zeros(label.shape)
        for l in range(0, len(valid_label_seg)):
            valid_mask = label == l  # set false to position of seg that not in valid_label_seg
            valid_masks += valid_mask  # set 0.0 to position of seg that not in valid_label_seg
        valid_masks[valid_masks == 0] = -1
        seg = np.float32(label) * valid_masks
        seg[seg < 0] = self.label_pad_color
        seg = np.uint8(seg)
        return seg

    def encode_segmap(self, mask, volid_label, valid_label):
        classes = -np.ones([100, 100])
        valid = [x for j in valid_label for x in j]
        for i in range(0, len(valid_label)):
            classes[i, :len(valid_label[i])] = valid_label[i]
        for label in volid_label:
            mask[mask == label] = self.label_pad_color
        for validc in valid:
            mask[mask == validc] = np.uint8(np.where(classes == validc)[0])

        return mask
コード例 #10
0
class ClassifyDataAugment():

    def __init__(self):
        torchvision_process = TorchVisionProcess()
        self.augment_transform = torchvision_process.torch_data_augment()
        self.dataset_process = ImageDataSetProcess()
        self.image_augment = ImageDataAugment()
        self.is_torchvision_augment = True
        self.is_augment_hsv = True
        self.is_augment_affine = True
        self.is_lr_flip = True

    def augment(self, image_rgb):
        image = image_rgb[:]
        if self.is_torchvision_augment:
            image = self.augment_transform(image)
        else:
            if self.is_augment_hsv:
                image = self.image_augment.augment_hsv(image)
            if self.is_augment_affine:
                image = self.augment_affine(image)
            if self.is_lr_flip:
                image = self.augment_lr_flip(image)
        return image

    def augment_affine(self, src_image):
        image_size = (src_image.shape[1], src_image.shape[0])
        matrix, degree = self.dataset_process.affine_matrix(image_size,
                                                            degrees=(-15, 15),
                                                            translate=(0.0, 0.0),
                                                            scale=(1.0, 1.0),
                                                            shear=(-3, 3))
        image = self.dataset_process.image_affine(src_image, matrix,
                                                  border_value=(0.0, 0.0, 0.0))
        return image

    def augment_lr_flip(self, src_image):
        image = src_image[:]
        if random.random() > 0.5:
            image = np.fliplr(image)
        return image
コード例 #11
0
class SegmentDataAugment():

    def __init__(self):
        self.is_augment_hsv = True
        self.is_augment_affine = True
        self.is_lr_flip = True
        self.dataset_process = ImageDataSetProcess()
        self.image_augment = ImageDataAugment()

    def augment(self, image_rgb, label):
        image = image_rgb[:]
        target = label[:]
        if self.is_augment_hsv:
            image = self.image_augment.augment_hsv(image_rgb)
        if self.is_augment_affine:
            image, target = self.augment_affine(image, target)
        if self.is_lr_flip:
            image, target = self.augment_lr_flip(image, target)
        return image, target

    def augment_affine(self, src_image, label):
        image_size = (src_image.shape[1], src_image.shape[0])
        matrix, degree = self.dataset_process.affine_matrix(image_size,
                                                            degrees=(-5, 5),
                                                            translate=(0.1, 0.1),
                                                            scale=(0.8, 1.1),
                                                            shear=(-3, 3))
        image = self.dataset_process.image_affine(src_image, matrix,
                                                  border_value=(127.5, 127.5, 127.5))
        target = self.dataset_process.image_affine(label, matrix,
                                                   border_value=250)
        return image, target

    def augment_lr_flip(self, src_image, label):
        image = src_image[:]
        target = label[:]
        if random.random() > 0.5:
            image = np.fliplr(image)
            target = np.fliplr(target)
        return image, target
コード例 #12
0
class ImagesLoader(DataLoader):

    def __init__(self, input_dir, image_size=(416, 416)):
        super().__init__()
        self.image_size = image_size
        self.imageProcess = ImageProcess()
        self.dirProcess = DirProcess()
        self.dataset_process = ImageDataSetProcess()
        temp_files = self.dirProcess.getDirFiles(input_dir, "*.*")
        self.files = list(temp_files)
        self.count = len(self.files)
        self.color = (127.5, 127.5, 127.5)

    def __iter__(self):
        self.index = -1
        return self

    def __next__(self):
        self.index += 1
        if self.index == self.count:
            raise StopIteration
        image_path = self.files[self.index]

        # Read image
        srcImage, rgb_image = self.imageProcess.readRgbImage(image_path)

        # Padded resize
        rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image,
                                                                   self.image_size,
                                                                   self.color)
        rgb_image = self.dataset_process.image_normaliza(rgb_image)
        numpy_image = self.dataset_process.numpy_transpose(rgb_image)
        torch_image = self.all_numpy_to_tensor(numpy_image)
        return srcImage, torch_image

    def __len__(self):
        return self.count
コード例 #13
0
class ClassifyDatasetProcess(BaseDataSetProcess):
    def __init__(self, mean, std):
        super().__init__()
        self.dataset_process = ImageDataSetProcess()
        self.mean = np.array(mean, dtype=np.float32)
        self.std = np.array(std, dtype=np.float32)
        self.normalize_transform = self.torchvision_process.torch_normalize(
            flag=0, mean=self.mean, std=self.std)

    def normaliza_dataset(self, src_image, normaliza_type=0):
        result = None
        if normaliza_type == 0:  # numpy normalize
            normaliza_image = self.dataset_process.image_normaliza(src_image)
            image = self.dataset_process.numpy_normaliza(
                normaliza_image, self.mean, self.std)
            image = self.dataset_process.numpy_transpose(image, image.dtype)
            result = self.numpy_to_torch(image, flag=0)
        elif normaliza_type == 1:  # torchvision normalize
            result = self.normalize_transform(src_image)
        return result

    def resize_image(self, src_image, image_size):
        image = self.dataset_process.image_resize(src_image, image_size)
        return image
コード例 #14
0
class DetectionDataAugment():

    def __init__(self):
        self.is_augment_hsv = True
        self.is_augment_affine = True
        self.is_lr_flip = True
        self.is_up_flip = False
        self.dataset_process = ImageDataSetProcess()
        self.image_augment = ImageDataAugment()

    def augment(self, image_rgb, labels):
        image = image_rgb[:]
        targets = labels[:]
        if self.is_augment_hsv:
            image = self.image_augment.augment_hsv(image_rgb)
        if self.is_augment_affine:
            image, targets = self.augment_affine(image, targets)
        if self.is_lr_flip:
            image, targets = self.augment_lr_flip(image, targets)
        if self.is_up_flip:
            image, targets = self.augment_up_flip(image, targets)
        return image, targets

    def augment_affine(self, src_image, labels):
        image_size = (src_image.shape[1], src_image.shape[0])
        matrix, degree = self.dataset_process.affine_matrix(image_size,
                                                    degrees=(-5, 5),
                                                    translate=(0.1, 0.1),
                                                    scale=(0.8, 1.1),
                                                    shear=(-3, 3))
        image = self.dataset_process.image_affine(src_image, matrix,
                                                  border_value=(127.5, 127.5, 127.5))
        # Return warped points also
        if labels is not None:
            targets = []
            for object in labels:
                points = np.array(object.getVector())
                area0 = (points[2] - points[0]) * (points[3] - points[1])
                xy = np.ones((4, 3))
                # x1y1, x2y2, x1y2, x2y1
                xy[:, :2] = points[[0, 1, 2, 3, 0, 3, 2, 1]].reshape(4, 2)
                xy = np.squeeze((xy @ matrix.T)[:, :2].reshape(1, 8))

                # create new boxes
                x = xy[[0, 2, 4, 6]]
                y = xy[[1, 3, 5, 7]]
                xy = np.array([x.min(), y.min(), x.max(), y.max()])

                # apply angle-based reduction
                radians = degree * math.pi / 180
                reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
                x = (xy[2] + xy[0]) / 2
                y = (xy[3] + xy[1]) / 2
                w = (xy[2] - xy[0]) * reduction
                h = (xy[3] - xy[1]) * reduction
                xy = np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2])

                # reject warped points outside of image
                np.clip(xy, 0, image_size[0], out=xy)
                w = xy[2] - xy[0]
                h = xy[3] - xy[1]
                area = w * h
                ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
                i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
                if i:
                    rect = Rect2D()
                    rect.class_id = object.class_id
                    rect.min_corner.x = xy[0]
                    rect.min_corner.y = xy[1]
                    rect.max_corner.x = xy[2]
                    rect.max_corner.y = xy[3]
                    targets.append(rect)
            return image, targets
        else:
            return image, None

    def augment_lr_flip(self, src_image, labels):
        # random left-right flip
        image_size = (src_image.shape[1], src_image.shape[0])
        image = src_image[:]
        if random.random() > 0.5:
            image = np.fliplr(image)
            for object in labels:
                temp = object.min_corner.x
                object.min_corner.x = image_size[0] - object.max_corner.x
                object.max_corner.x = image_size[0] - temp
        return image, labels

    def augment_up_flip(self, src_image, labels):
        # random up-down flip
        image_size = (src_image.shape[1], src_image.shape[0])
        image = src_image[:]
        if random.random() > 0.5:
            image = np.flipud(image)
            for object in labels:
                temp = object.min_corner.y
                object.min_corner.y = image_size[1] - object.max_corner.y
                object.max_corner.y = image_size[1] - temp
        return image, labels
コード例 #15
0
 def __init__(self, image_size):
     self.image_size = image_size
     self.dir_process = DirProcess()
     self.image_process = ImageProcess()
     self.dataset_process = ImageDataSetProcess()
コード例 #16
0
 def __init__(self):
     self.dataset_process = ImageDataSetProcess()
コード例 #17
0
 def __init__(self):
     super().__init__()
     self.dataset_process = ImageDataSetProcess()
     self.image_pad_color = (0, 0, 0)
コード例 #18
0
class DetectionDataSetProcess(BaseDataSetProcess):
    def __init__(self):
        super().__init__()
        self.dataset_process = ImageDataSetProcess()
        self.image_pad_color = (0, 0, 0)

    def normaliza_dataset(self, src_image, labels=None, image_size=None):
        image = self.dataset_process.image_normaliza(src_image)
        image = self.dataset_process.numpy_transpose(image)
        result = None
        if labels is not None:
            result = np.zeros((len(labels), 5), dtype=np.float32)
            for index, rect in enumerate(labels):
                class_id = rect.class_id
                x, y = rect.center()
                x /= image_size[0]
                y /= image_size[1]
                width = rect.width() / image_size[0]
                height = rect.height() / image_size[1]
                result[index, :] = np.array([class_id, x, y, width, height])
        return image, result

    def resize_dataset(self,
                       src_image,
                       image_size,
                       boxes=None,
                       class_name=None):
        labels = []
        image, ratio, pad = self.dataset_process.image_resize_square(
            src_image, image_size, color=self.image_pad_color)
        if boxes is not None:
            for box in boxes:
                if box.name in class_name:
                    rect = Rect2D()
                    rect.class_id = class_name.index(box.name)
                    rect.min_corner.x = ratio * box.min_corner.x + pad[0] // 2
                    rect.min_corner.y = ratio * box.min_corner.y + pad[1] // 2
                    rect.max_corner.x = ratio * box.max_corner.x + pad[0] // 2
                    rect.max_corner.y = ratio * box.max_corner.y + pad[1] // 2
                    labels.append(rect)
        return image, labels

    def change_outside_labels(self, labels):
        delete_index = []
        # reject warped points outside of image (0.999 for the image boundary)
        for i, label in enumerate(labels):
            if label[2] + label[4] / 2 >= float(1):
                yoldH = label[2] - label[4] / 2
                label[2] = (yoldH + float(0.999)) / float(2)
                label[4] = float(0.999) - yoldH
            if label[1] + label[3] / 2 >= float(1):
                yoldW = label[1] - label[3] / 2
                label[1] = (yoldW + float(0.999)) / float(2)
                label[3] = float(0.999) - yoldW
            # filter the small object (w for label[3] in 1280 is limit to 6.8 pixel (6.8/1280=0.0053))
            if label[3] < 0.0053 or label[4] < 0.0055:
                # filter the small object (h for label[4] in 720 is limit to 4.0 pixel (4.0/1280=0.0053))
                delete_index.append(i)

        labels = np.delete(labels, delete_index, axis=0)
        return labels