Python xyxy2xywh Examples

Programming Language: Python

Namespace/Package Name: utils.utils

Method/Function: xyxy2xywh

Examples at hotexamples.com: 27

Python xyxy2xywh - 27 examples found. These are the top rated real world Python examples of utils.utils.xyxy2xywh extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: datasets.py Project: Grey-z/uav_tracker

    def __next__(self):
        self.count += 1
        # print('11')
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size  #
        ib = min((self.count + 1) * self.batch_size, self.nF)
        # ia = self.count * 4
        # ib = min((self.count + 1) * 4, self.nF2)

        if self.multi_scale:
            # Multi-Scale YOLO Training
            height = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
        else:
            # Fixed-Scale YOLO Training
            height = self.height

        img_all = []
        labels_all = []
        for index, files_index in enumerate(range(ia, ib)):
            # if index >= 4:
            #     img_path = self.temp[self.shuffled_vector_2[files_index]]
            #     label_path = self.temp_label[self.shuffled_vector_2[files_index]]
            # else:
            img_path = self.img_files[self.shuffled_vector[files_index]]
            # print(img_path)
            # img_path = 'D:/00-Data/rtpose_datasets/COCO/images' + img_path
            # print(img_path)
            label_path = self.label_files[self.shuffled_vector[files_index]]
            # print(label_path.split('/')[-1])
            label_path = 'D:/01-ComputerVisionEntries/10-HumanDetection/PyTorch-YOLOv3-master/labels/train/' + label_path.split(
                '/')[-1]
            # print(label_path)
            # face_path =
            # print(label_path)
            # print(label_path)
            img = cv2.imread(img_path)  # BGR
            # print(img_path)
            if img is None:
                print(img_path)
                continue

            augment_hsv = True
            if self.augment and augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

            h, w, _ = img.shape
            # print(h, w,_)
            img, ratio, padw, padh = resize_square(img,
                                                   height=height,
                                                   color=(127.5, 127.5, 127.5))
            # print(padw, padh)
            # Load labels
            if os.path.isfile(label_path):
                labels0 = np.loadtxt(label_path,
                                     dtype=np.float32).reshape(-1, 5)

                # Normalized xywh to pixel xyxy format
                labels0[:, 1] = (labels0[:, 1] + labels0[:, 3] / 2)  # center x
                labels0[:, 2] = (labels0[:, 2] + labels0[:, 4] / 2)  # center y
                labels = labels0.copy()

                # x1,y1, x2,y2  #ratio = old/new
                labels[:, 1] = ratio * w * (
                    labels0[:, 1] -
                    labels0[:, 3] / 2) + padw  # 将归一化的坐标值--恢复--转换成resize之后的坐标值
                labels[:, 2] = ratio * h * (labels0[:, 2] -
                                            labels0[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (labels0[:, 1] +
                                            labels0[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (labels0[:, 2] +
                                            labels0[:, 4] / 2) + padh
            else:
                labels = np.array([])

            # Augment image and labels
            if self.augment:
                img, labels, M = random_affine(img,
                                               labels,
                                               degrees=(-5, 5),
                                               translate=(0.10, 0.10),
                                               scale=(0.90, 1.10))
            # print(img.shape)
            plotFlag = False
            if plotFlag:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 10)) if index == 0 else None
                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
                plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                         labels[:, [2, 2, 4, 4, 2]].T, '.-')
                plt.axis('off')

            nL = len(labels)
            if nL > 0:
                # convert xyxy to xywh
                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
                # print(labels[:, 1])

            if self.augment:
                # random left-right flip
                lr_flip = True
                if lr_flip & (random.random() > 0.5):
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random up-down flip
                ud_flip = False
                if ud_flip & (random.random() > 0.5):
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

            img_all.append(img)
            labels_all.append(torch.from_numpy(labels))

        # Normalize

        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # Channel first and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        # img_all -= self.rgb_mean
        # img_all /= self.rgb_std
        # print(img_all.shape)
        img_all /= 255.0

        return torch.from_numpy(img_all), labels_all

Example #2

Show file

File: datasets.py Project: varadsrivastava/Slim_TinyYOLO

    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]
        hyp = self.hyp

        # Load image
        img = self.imgs[index]
        if img is None:
            img = cv2.imread(img_path)  # BGR
            assert img is not None, 'Image Not Found ' + img_path
            r = self.img_size / max(img.shape)  # size ratio
            if self.augment and r < 1:  # if training (NOT testing), downsize to inference shape
                h, w, _ = img.shape
                img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # INTER_LINEAR fastest

        # Augment colorspace
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
            S = img_hsv[:, :, 1].astype(np.float32)  # saturation
            V = img_hsv[:, :, 2].astype(np.float32)  # value

            a = random.uniform(-1, 1) * hyp['hsv_s'] + 1
            b = random.uniform(-1, 1) * hyp['hsv_v'] + 1
            S *= a
            V *= b

            img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
            img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        # Letterbox
        h, w, _ = img.shape
        if self.rect:
            shape = self.batch_shapes[self.batch[index]]
            img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='rect')
        else:
            shape = self.img_size
            img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='square')

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            x = self.labels[index]
            if x is None:  # labels not preloaded
                with open(label_path, 'r') as f:
                    x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img, labels,
                                        degrees=hyp['degrees'],
                                        translate=hyp['translate'],
                                        scale=hyp['scale'],
                                        shear=hyp['shear'])

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)

Example #3

Show file

    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size
        ib = min((self.count + 1) * self.batch_size, self.nF)

        if self.multi_scale:
            # Multi-Scale YOLO Training
            height = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
        else:
            # Fixed-Scale YOLO Training
            height = self.height

        img_all = []
        labels_all = []
        for index, files_index in enumerate(range(ia, ib)):
            img_path = self.img_files[self.shuffled_vector[files_index]]
            label_path = self.label_files[self.shuffled_vector[files_index]]

            img = cv2.imread(os.path.join("image_train", img_path))  # BGR
            #print(img.shape)
            if img is None:
                continue

            augment_hsv = True
            if self.augment and augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

            h, w, _ = img.shape
            img, ratio, padw, padh = letterbox(img, height=height)

            # Load labels
            if os.path.isfile(os.path.join("labels_txt", label_path)):
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    labels0 = np.loadtxt(os.path.join("labels_txt",
                                                      label_path),
                                         dtype=np.float32).reshape(-1, 5)
                    #print(os.path.join("data_train",label_path))
                    #data = np.loadtxt(myfile, unpack=True)
                #labels0 = np.loadtxt(os.path.join("data_train",label_path), dtype=np.float32).reshape(-1, 5)
                #print('='*50, os.path.join("labels_txt",label_path))
                #print()
                # Normalized xywh to pixel xyxy format
                labels = labels0.copy()
                #labels[:, 0] = np.int32(labels[:, 0])
                labels[:, 1] = ratio * w * (labels0[:, 1] -
                                            labels0[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (labels0[:, 2] -
                                            labels0[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (labels0[:, 1] +
                                            labels0[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (labels0[:, 2] +
                                            labels0[:, 4] / 2) + padh
                #print(labels)

            else:
                print(os.path.join("data_train", label_path))
                print("st wrong")
                labels = np.array([])

            # Augment image and labels
            if self.augment:
                img, labels, M = random_affine(img,
                                               labels,
                                               degrees=(-5, 5),
                                               translate=(0.10, 0.10),
                                               scale=(0.90, 1.10))

            plotFlag = False
            if plotFlag:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 10)) if index == 0 else None
                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
                plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                         labels[:, [2, 2, 4, 4, 2]].T, '.-')
                plt.axis('off')

            nL = len(labels)
            if nL > 0:
                # convert xyxy to xywh
                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
                # labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
                # print(os.path.join("data_train",label_path))
                # print(labels)

            if self.augment:
                # random left-right flip
                lr_flip = True
                if lr_flip & (random.random() > 0.5):
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random up-down flip
                ud_flip = False
                if ud_flip & (random.random() > 0.5):
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

            img_all.append(img)
            labels_all.append(torch.from_numpy(labels))

        # Normalize
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        img_all /= 255.0

        return torch.from_numpy(img_all), labels_all

Example #4

Show file

    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size
        ib = min((self.count + 1) * self.batch_size, self.nF)

        img_all, labels_all, img_paths, img_shapes = [], [], [], []
        for index, files_index in enumerate(range(ia, ib)):
            img_path = self.img_files[self.shuffled_vector[files_index]]
            label_path = self.label_files[self.shuffled_vector[files_index]]

            img = cv2.imread(img_path)  # BGR
            assert img is not None, 'File Not Found ' + img_path

            augment_hsv = True
            if self.augment and augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

            h, w, _ = img.shape
            img, ratio, padw, padh = letterbox(img, height=self.img_size)

            # Load labels
            if os.path.isfile(label_path):
                labels0 = np.loadtxt(label_path,
                                     dtype=np.float32).reshape(-1, 5)

                # Normalized xywh to pixel xyxy format
                labels = labels0.copy()
                labels[:, 1] = ratio * w * (labels0[:, 1] -
                                            labels0[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (labels0[:, 2] -
                                            labels0[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (labels0[:, 1] +
                                            labels0[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (labels0[:, 2] +
                                            labels0[:, 4] / 2) + padh
            else:
                labels = np.array([])

            # Augment image and labels
            if self.augment:
                img, labels, M = random_affine(img,
                                               labels,
                                               degrees=(-5, 5),
                                               translate=(0.10, 0.10),
                                               scale=(0.90, 1.10))

            plotFlag = False
            if plotFlag:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 10)) if index == 0 else None
                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
                plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                         labels[:, [2, 2, 4, 4, 2]].T, '.-')
                plt.axis('off')

            nL = len(labels)
            if nL > 0:
                # convert xyxy to xywh
                labels[:,
                       1:5] = xyxy2xywh(labels[:, 1:5].copy()) / self.img_size

            if self.augment:
                # random left-right flip
                lr_flip = True
                if lr_flip & (random.random() > 0.5):
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random up-down flip
                ud_flip = False
                if ud_flip & (random.random() > 0.5):
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

            if nL > 0:
                labels = np.concatenate((np.zeros(
                    (nL, 1), dtype='float32') + index, labels), 1)
                labels_all.append(labels)

            img_all.append(img)
            img_paths.append(img_path)
            img_shapes.append((h, w))

        # Normalize
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        img_all /= 255.0

        labels_all = torch.from_numpy(np.concatenate(labels_all, 0))
        return torch.from_numpy(img_all), labels_all, img_paths, img_shapes

Example #5

Show file

    def __getitem__(self, index):
        img_path = self.img_files[index]
        label_path = self.label_files[index]

        img = cv2.imread(img_path)  # BGR
        assert img is not None, 'File Not Found ' + img_path

        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50  # must be < 1.0
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            S = img_hsv[:, :, 1].astype(np.float32)
            V = img_hsv[:, :, 2].astype(np.float32)

            a = (random.random() * 2 - 1) * fraction + 1
            S *= a
            if a > 1:
                np.clip(S, None, 255, out=S)

            a = (random.random() * 2 - 1) * fraction + 1
            V *= a
            if a > 1:
                np.clip(V, None, 255, out=V)

            img_hsv[:, :, 1] = S  # .astype(np.uint8)
            img_hsv[:, :, 2] = V  # .astype(np.uint8)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(img, height=self.img_size)

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            with open(label_path, 'r') as file:
                lines = file.read().splitlines()

            x = np.array([x.split() for x in lines], dtype=np.float32)
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img,
                                        labels,
                                        degrees=(-10, 10),
                                        translate=(0.10, 0.10),
                                        scale=(0.80, 1.20))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)

Example #6

Show file

    def __getitem__(self, index):
        # 新的下角标
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        hyp = self.hyp
        mosaic = False and self.augment
        # 如果开启镶嵌增强、数据增强
        # 加载四张图片，作为一个镶嵌，具体看下文解析。
        if mosaic:
            # 加载镶嵌内容
            img, labels = load_mosaic(self, index)
            shapes = None

        else:
            # 加载图片
            img, (h0, w0), (h, w) = load_image(self, index)

            # 仿射变换
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size 
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  

            # 加载标注文件
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # 如果标签没有加载，读取label_path内容
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # 将归一化后的xywh转化为左上角、右下角的表达形式
                    labels = x.copy()
                    labels[:, 1] = ratio[0] * w * (
                        x[:, 1] - x[:, 3] / 2) + pad[0]  # pad width
                    labels[:, 2] = ratio[1] * h * (
                        x[:, 2] - x[:, 4] / 2) + pad[1]  # pad height
                    labels[:, 3] = ratio[0] * w * (x[:, 1] +
                                                   x[:, 3] / 2) + pad[0]
                    labels[:, 4] = ratio[1] * h * (x[:, 2] +
                                                   x[:, 4] / 2) + pad[1]

        if self.augment:
            # 图片空间的数据增强
            if not mosaic:
                # 如果没有使用镶嵌的方法，那么对图片进行随机放射
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # 增强hsv空间
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

        nL = len(labels)  # 标注文件个数

        if nL:
            # 将 xyxy 格式转化为 xywh 格式
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])# x1, y1, x2, y2

            # 归一化到0-1之间
            labels[:, [2, 4]] /= img.shape[0]  # height y1/h , y2/h
            labels[:, [1, 3]] /= img.shape[1]  # width  x1/h , x2/h

        if self.augment:
            # 随机左右翻转
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # 随机上下翻转
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # 图像维度转换
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, img_path, shapes

Example #7

Show file

    def __getitem__(self, index):
        img = self.load_image(index)
        anns, K = self.load_annotations(index)

        # Change h, w -> w, h
        size = np.array([i for i in img.shape[:-1]], np.float32)[::-1]
        center = np.array([i/2 for i in img.shape[:-1]], np.float32)[::-1]
        # return img, K

        """
        resize, horizontal flip, and affine augmentation are performed here.
        since it is complicated to compute heatmap w.r.t transform.
        """

        flipped = False
        if (self.is_train) and (np.random.rand() < self.flip_prob):
            flipped = True
            img = cv2.flip(img, 1)
            center[0] = size[0] - center[0] - 1
            K[0, 2] = size[0] - K[0, 2] - 1

        affine = False
        if (self.is_train) and (np.random.rand() < self.aug_prob):
            img, target, trans_mat = random_affine(img, degrees=0, translate=.1, scale=.1)
            affine = True
            '''
            TODO: affine the label mat
            point = affine_transform(point, trans_mat)
            box2d[:2] = affine_transform(box2d[:2], trans_mat)
            box2d[2:] = affine_transform(box2d[2:], trans_mat)
            
            TODO:There is something wrong when clip after resize
            box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.input_width - 1)
            box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.input_height - 1)
            '''

        resize = False
        if img.shape[0] != self.input_height | img.shape[1] != self.input_width:
            img, ratio, pad = resize_image_with_pad(img, (self.input_height, self.input_width))
            resize = True
        
        labels = np.zeros((len(anns), 9))
        for i, a in enumerate(anns):
            a = a.copy()
            _cls = a["label"]

            locs = np.array(a["locations"])
            rot_y = np.array(a["rot_y"])
            if flipped:
                locs[0] *= -1
                rot_y *= -1
            
            # We can get 2D bbox by labels or calculate by camera&3D bbox directly
            point, box2d, box3d = encode_label(
                K, rot_y, a["dimensions"], locs
            )
            
            # 当图像中的物体不全时，计算出的2D框会超出图像大小范围，这里先使用标注信息替代
            box2d = a["bbox"]
            labels[i, 0] = _cls
            labels[i, 1:5] = np.array(box2d)
            labels[i, 5:8] = np.array(a["dimensions"])
            labels[i, 8] = rot_y
            # h, w = box2d[3] - box2d[1], box2d[2] - box2d[0]
        
        nL = len(labels)
        if nL > 0:
            if resize:
                labels[:, 1] = ratio[0] * labels[:, 1] + pad[0]  # pad width
                labels[:, 2] = ratio[1] * labels[:, 2] + pad[1]  # pad height
                labels[:, 3] = ratio[0] * labels[:, 3] + pad[0]
                labels[:, 4] = ratio[1] * labels[:, 4] + pad[1]
            
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

            #
            labels[labels < 0] = 0.0
        label_out = torch.zeros((nL, self.out_parms+1))
        if nL > 0:
            label_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        shapes = (size[1], size[0]), (size[1]/img.shape[2], size[0]/img.shape[1], pad)
        return torch.from_numpy(img), label_out[:, :6], self.files[index], shapes

Example #8

Show file

File: datasets.py Project: ultralytics/xview-docker

    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size
        ib = min((self.count + 1) * self.batch_size, self.nF)

        height = self.height
        # height = random.choice([15, 17, 19, 21]) * 32

        img_all = []
        labels_all = []
        for index, files_index in enumerate(range(ia, ib)):
            # img_path = self.files[self.shuffled_vector[files_index]]  # BGR
            img_path = '%s/%g.bmp' % (self.path,
                                      self.shuffled_vector[files_index])
            # img_path = '/Users/glennjocher/Downloads/DATA/xview/train_images/2294.bmp'

            img0 = cv2.imread(img_path)
            if img0 is None:
                continue

            augment_hsv = True
            if augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img0, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img0)

            # Load labels
            chip = img_path.rsplit('/')[-1]
            i = (self.mat['id'] == float(
                chip.replace('.tif', '').replace('.bmp', ''))).nonzero()[0]
            labels1 = self.mat['targets'][i]

            # Remove buildings and small cars
            # labels1 = labels1[(labels1[:, 0] != 5) & (labels1[:, 0] != 48)]

            img1, labels1, M = random_affine(img0,
                                             targets=labels1,
                                             degrees=(-20, 20),
                                             translate=(0.01, 0.01),
                                             scale=(0.70, 1.30))  # RGB

            nL1 = len(labels1)
            border = height / 2 + 1

            # Pick 100 random points inside image
            r = np.ones((100, 3))
            r[:, :2] = np.random.rand(
                100, 2) * (np.array(img0.shape)[[1, 0]] - border * 2) + border
            r = (r @ M.T)[:, :2]
            r = r[np.all(r > border, 1)
                  & np.all(r < img1.shape[0] - border, 1)]

            # import matplotlib.pyplot as plt
            # plt.imshow(img1[:, :, ::-1])
            # plt.plot(labels1[:, [1, 3, 3, 1, 1]].T, labels1[:, [2, 2, 4, 4, 2]].T, '.-')
            # plt.plot(r[:,0],r[:,1],'.')

            if nL1 > 0:
                weights = []
                for k in range(len(r)):
                    x = (labels1[:, 1] + labels1[:, 3]) / 2
                    y = (labels1[:, 2] + labels1[:, 4]) / 2
                    c = labels1[(abs(r[k, 0] - x) < height / 2) &
                                (abs(r[k, 1] - y) < height / 2), 0]
                    if len(c) == 0:
                        weights.append(1e-16)
                    else:
                        weights.append(self.class_weights[c.astype(
                            np.int8)].sum())

                weights = np.array(weights)
                weights /= weights.sum()
                r = r[np.random.choice(len(r),
                                       size=8,
                                       p=weights,
                                       replace=False)]

            if nL1 > 0:
                area0 = (labels1[:, 3] - labels1[:, 1]) * (labels1[:, 4] -
                                                           labels1[:, 2])

            h, w, _ = img1.shape
            for j in range(8):
                labels = np.array([], dtype=np.float32)

                pad_x = int(r[j, 0] - height / 2)
                pad_y = int(r[j, 1] - height / 2)
                if nL1 > 0:
                    labels = labels1.copy()
                    labels[:, [1, 3]] -= pad_x
                    labels[:, [2, 4]] -= pad_y
                    np.clip(labels[:, 1:5], 0, height, out=labels[:, 1:5])

                    lw = labels[:, 3] - labels[:, 1]
                    lh = labels[:, 4] - labels[:, 2]
                    area = lw * lh
                    ar = np.maximum(lw / (lh + 1e-16), lh / (lw + 1e-16))

                    # objects must have width and height > 4 pixels
                    labels = labels[(lw > 4) & (lh > 4) & (area > 20) &
                                    (area / area0 > 0.1) & (ar < 10)]

                # pad_x, pad_y, counter = 0, 0, 0
                # while (counter < len(r)) & (len(labels) == 0):
                #     pad_x = int(r[counter, 0] - height / 2)
                #     pad_y = int(r[counter, 1] - height / 2)
                #
                #     if nL1 == 0:
                #         break
                #
                #     labels = labels1.copy()
                #     labels[:, [1, 3]] -= pad_x
                #     labels[:, [2, 4]] -= pad_y
                #     labels[:, 1:5] = np.clip(labels[:, 1:5], 0, height)
                #
                #     lw = labels[:, 3] - labels[:, 1]
                #     lh = labels[:, 4] - labels[:, 2]
                #     area = lw * lh
                #     ar = np.maximum(lw / (lh + 1e-16), lh / (lw + 1e-16))
                #
                #     # objects must have width and height > 4 pixels
                #     labels = labels[(lw > 4) & (lh > 4) & (area / area0 > 0.2) & (ar < 15)]
                #     counter += 1

                img = img1[pad_y:pad_y + height, pad_x:pad_x + height]

                # import matplotlib.pyplot as plt
                # plt.subplot(4, 4, j+1).imshow(img[:, :, ::-1])
                # plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-')

                nL = len(labels)
                if nL > 0:
                    # convert labels to xywh
                    labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
                    # remap xview classes 11-94 to 0-61
                    # labels[:, 0] = xview_classes2indices(labels[:, 0])

                # random lr flip
                if random.random() > 0.5:
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random ud flip
                if random.random() > 0.5:
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

                img_all.append(img)
                labels_all.append(torch.from_numpy(labels))

        # Randomize
        i = np.random.permutation(len(labels_all))
        img_all = [img_all[j] for j in i]
        labels_all = [labels_all[j] for j in i]

        # Normalize
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        img_all -= self.rgb_mean
        img_all /= self.rgb_std

        return torch.from_numpy(img_all), labels_all

Example #9

Show file

def detect(input_image, save_img=True):

    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

    global frame_num, model
    global pubCentBlem, pubCentUnBlem

    # Initialize
    #device = torch_utils.select_device(opt.device)
    #if os.path.exists(out):
    #    shutil.rmtree(out)  # delete output folder
    #os.makedirs(out)  # make new output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()  # to FP16

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    print("class names array ", names)
    # names = ['blemished', 'unblemished', 'glove', 'belt', 'bin', 'head']
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    bounding_boxes_all_images = []

    img0 = input_image.astype('float32')
    #img0 = cv2.resize(img0, (640,480), interpolation = cv2.INTER_AREA)
    img = letterbox(img0, new_shape=imgsz)[0]
    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)
    dataset = [("frame_num" + str(frame_num) + '.jpg', img, img0, None)]

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = torch_utils.time_synchronized()
        pred = model(img, augment=opt.augment)[0]
        # pred = model(img)[0]

        t2 = torch_utils.time_synchronized()

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t3 = torch_utils.time_synchronized()

        # needed specific to each image
        Loc2arrCent = {}
        arrCentBlem = []
        arrCentUnBlem = []
        Loc2Cls = {}

        # Process detections

        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  #  normalization gain whwh
            bounding_boxes = {}

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                minx = 5000
                miny = 5000
                maxx = 0
                maxy = 0

                box_num = 0
                bounding_boxes = {}
                for *xyxy, conf, cls in det:
                    box_num += 1
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                            gn).view(-1).tolist()  # normalized xywh
                    #print("Box num:", box_num, " label: ",names[int(cls)], " xyxy: ", int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]), "xywh: ", float(xywh[0]), float(xywh[1]), float(xywh[2]), float(xywh[3]))
                    if (bounding_boxes.get(names[int(cls)], None) == None):
                        bounding_boxes[names[int(cls)]] = [[
                            int(xyxy[0]),
                            int(xyxy[1]),
                            abs(int(xyxy[2]) - int(xyxy[0])),
                            abs(int(xyxy[3]) - int(xyxy[1]))
                        ]]
                    else:
                        bounding_boxes[names[int(cls)]].append([
                            int(xyxy[0]),
                            int(xyxy[1]),
                            abs(int(xyxy[2]) - int(xyxy[0])),
                            abs(int(xyxy[3]) - int(xyxy[1]))
                        ])

                    tlx, tly, brx, bry = int(xyxy[0]), int(xyxy[1]), int(
                        xyxy[2]), int(xyxy[3])
                    if tlx < minx:
                        minx = tlx
                    if tly < miny:
                        miny = tly
                    if bry > maxy:
                        maxy = bry
                    if brx > maxx:
                        maxx = brx  #crop_img = img[y:y+h, x:x+w]

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(save_path[:save_path.rfind('.')] + '.txt',
                                  'a') as file:
                            file.write(('%g ' * 5 + '\n') %
                                       (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        # print("label input to plot_one_box ",label)
                        # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=1)

                    # append onion centroid to list of centroids
                    if names[int(cls)] == 'blemished' or names[int(
                            cls)] == 'unblemished':
                        centx = (tlx + brx) / 2
                        centy = (tly + bry) / 2

                        if centx > 250:
                            # FOR CAMERA LOOKING FROM FRONT
                            # if center is on the belt
                            if names[int(cls)] == 'blemished':
                                Loc2Cls[centx] = 0
                            else:
                                Loc2Cls[centx] = 1

                            Loc2arrCent[centx] = (centx, centy)
                            if names[int(
                                    cls
                            )] == 'blemished' and centx not in arrCentBlem:
                                arrCentBlem.append(centx)
                                arrCentBlem.append(centy)
                            else:
                                if centx not in arrCentUnBlem:
                                    arrCentUnBlem.append(centx)
                                    arrCentUnBlem.append(centy)

############################################### all bounding boxes for this frames: "bounding_boxes" ########################################################################################################

            bounding_boxes_all_images.append(bounding_boxes)
            print("Frame number = ", frame_num, "Bounding boxes: ",
                  bounding_boxes)
            frame_num += 1
            # Print time (inference + NMS)
            print('%s Inference. (%.3fs)' % (s, t2 - t1))
            print('%s NMS. (%.3fs)' % (s, t3 - t2))
            #print('im0.shape before',im0.shape)
            ############################################### cropping
            #im0 = im0[miny:maxy, minx:maxx]
            #print('im0.shape after',im0.shape)

            # Stream results
            view_img = True
            if view_img:
                #cv2_imshow( im0)
                not_showing = True
                #cv2.imshow(p, im0)

                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            save_img = True
            if save_img:  #'src/beginner_tutorials/scripts/yolov5/inference/output'
                #save_path = '/home/psuresh/src/beginner_tutorials/scripts/yolov5/inference/output/frame1.jpg'
                cv2.imwrite(save_path, im0)  #if dataset.mode == 'images':
########################################################## comment this line to avoid saving images  ########################################################################################################

# publish centroids for current image
        print("publishing arrCentBlem ", arrCentBlem)
        print("publishing arrCentUnBlem ", arrCentUnBlem)
        pubCentBlem.publish(Float32MultiArray(data=arrCentBlem))
        pubCentUnBlem.publish(Float32MultiArray(data=arrCentUnBlem))

        # make array LocOrderedPreds
        LocOrderedPreds = list(
            OrderedDict(sorted(Loc2Cls.items(), key=lambda t: t[0])).values())
        print("publishing LocOrderedPreds ", LocOrderedPreds)
        pubLocOrderedPreds.publish(Int32MultiArray(data=LocOrderedPreds))
        arrCentxyOrd = list(
            OrderedDict(sorted(Loc2arrCent.items(),
                               key=lambda t: t[0])).values())
        print("arrCentxyOrd ", arrCentxyOrd)
        arrCentLocOrd = []
        for i in range(len(arrCentxyOrd)):
            arrCentLocOrd.append(arrCentxyOrd[i][0])
            arrCentLocOrd.append(arrCentxyOrd[i][1])
        print("publishing arrCentLocOrd ", arrCentLocOrd)
        pubCentLocOrd.publish(Float32MultiArray(data=arrCentLocOrd))

    #global img_processed
    #img_processed = True

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    return bounding_boxes_all_images

Example #10

Show file

File: deep_sort.py Project: zyg11/deep_sort_yolov3_pytorch

    def detect(self, outfile=None):
        frame_cnt = -1

        if outfile is not None:
            f = open(outfile, 'w')

        print("begin....")

        while self.vdo.grab():
            frame_cnt += 1

            if frame_cnt % 3 == 0:
                continue

            start = time.time()
            _, ori_im = self.vdo.retrieve()
            im = ori_im

            t1_begin = time.time()
            bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im)
            t1_end = time.time()

            t2_begin = time.time()
            if bbox_xxyy is not None:
                # select class
                # mask = cls_ids == 0
                # bbox_xxyy = bbox_xxyy[mask]

                # bbox_xxyy[:, 3:] *= 1.2
                # cls_conf = cls_conf[mask]

                bbox_xcycwh = xyxy2xywh(bbox_xxyy)
                outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im)

                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    # 画框
                    ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)

                    # frame, id, tlwh(%.2f),1,-1,-1,-1
                    if outfile is not None:
                        box_xywh = xyxy2tlwh(bbox_xyxy)
                        for i in range(len(box_xywh)):
                            write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % (
                                frame_cnt + 1, outputs[i, -1],
                                int(box_xywh[i][0]), int(box_xywh[i][1]),
                                int(box_xywh[i][2]), int(box_xywh[i][3]))
                            f.write(write_line)

            t2_end = time.time()

            end = time.time()
            print(
                "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f"
                % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin),
                   (end - start), ((t1_end - t1_begin) * 100 /
                                   ((end - start))), (1 / (end - start))))
            if self.args.display:
                cv2.imshow("test", ori_im)
                cv2.waitKey(1)

            if self.args.save_path:
                self.output.write(ori_im)

        if outfile is not None:
            f.close()

Example #11

Show file

    def __getitem__(self, index):

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        # Load image
        img, (h0, w0), (h, w) = load_image(self, index)

        # Letterbox
        shape = self.batch_shapes[self.batch[
            index]] if self.rect else self.img_size  # final letterboxed shape
        img, ratio, pad = letterbox(img,
                                    shape,
                                    auto=False,
                                    scaleup=self.augment)
        shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            x = self.labels[index]
            if x is None:  # labels not preloaded
                with open(label_path, 'r') as f:
                    x = np.array([x.split() for x in f.read().splitlines()],
                                 dtype=np.float32)

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        nL = len(labels)  # number of labels

        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        # print (f'before loader {torch.from_numpy(img).shape}_{labels_out}_{img_path}')
        return torch.from_numpy(img), labels_out, img_path, shapes

Example #12

Show file

    def __getitem__(self, index):
        img_uri = self.img_files[index]
        img_labels = self.labels[index]
        # don't download, since it was already downloaded in the init
        img_path = img_uri
        img_name = ("_".join(map(str, img_path.split("_")[-5:])))
        orig_img = PIL.Image.open(img_path).convert('RGB')
        if orig_img is None:
            raise Exception(
                "Empty image: {img_path}".format(img_path=img_path))

        if self.vis_batch and len(img_labels) > 0:
            vis_orig_img = copy.deepcopy(orig_img)
            labels = add_class_dimension_to_labels(img_labels)
            labels = xyhw2xyxy_corner(labels, skip_class_dimension=True)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + ".jpg")
            visualize_and_save_to_local(vis_orig_img,
                                        labels,
                                        tmp_path,
                                        box_color="green")
            print(f'new image uploaded to {tmp_path}')

        # First, handle image re-shaping
        if self.ts:
            scale = self.scales[index]
            scaled_img = scale_image(orig_img, scale)
            scaled_img_width, scaled_img_height = scaled_img.size
            patch_width, patch_height = self.width, self.height

            vert_pad, horiz_pad = pre_tile_padding(scaled_img_width,
                                                   scaled_img_height,
                                                   patch_width, patch_height)
            padded_img = torchvision.transforms.functional.pad(
                scaled_img,
                padding=(horiz_pad, vert_pad, horiz_pad, vert_pad),
                fill=(127, 127, 127),
                padding_mode="constant")
            padded_img_width, padded_img_height = padded_img.size

            _, _, n_patches, _, _ = get_patch_spacings(padded_img_width,
                                                       padded_img_height,
                                                       patch_width,
                                                       patch_height)

            patch_index = random.randint(0, n_patches - 1)
            if self.debug_mode:
                patch_index = 0
            img, boundary = get_patch(padded_img, patch_width, patch_height,
                                      patch_index)
        else:
            orig_img_width, orig_img_height = orig_img.size
            vert_pad, horiz_pad, ratio = calculate_padding(
                orig_img_height, orig_img_width, self.height, self.width)
            img = torchvision.transforms.functional.pad(
                orig_img,
                padding=(horiz_pad, vert_pad, horiz_pad, vert_pad),
                fill=(127, 127, 127),
                padding_mode="constant")
            img = torchvision.transforms.functional.resize(
                img, (self.height, self.width))

        # If no labels, no need to do augmentation (this should change in the future)
        #   so immediately return with the padded image and empty labels
        if len(img_labels) == 0:
            labels = torch.zeros((len(img_labels), 5))
            img = torchvision.transforms.functional.to_tensor(img)
            labels = F.pad(
                labels,
                pad=[0, 0, 0, self.num_targets_per_image - len(labels)],
                mode="constant")
            return img_uri, img, labels

        # Next, handle label re-shaping
        labels = add_class_dimension_to_labels(img_labels)
        labels = xyhw2xyxy_corner(labels)
        if self.ts:
            labels = scale_labels(labels, self.scales[index])
            labels = add_padding_on_each_side(labels, horiz_pad, vert_pad)
            if self.vis_batch:
                tmp_path = os.path.join(visualization_tmp_path,
                                        img_name[:-4] + "_scaled.jpg")
                visualize_and_save_to_local(padded_img,
                                            labels,
                                            tmp_path,
                                            box_color="red")

            labels_temp = filter_and_offset_labels(labels, boundary)

            if self.vis_batch:
                pre_vis_labels = copy.deepcopy(labels)
                for i in range(n_patches):
                    vis_patch_img, boundary = get_patch(
                        padded_img, patch_width, patch_height, i)

                    labels = filter_and_offset_labels(pre_vis_labels, boundary)

                    tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \
                                        "_patch_{}.jpg".format(i))
                    visualize_and_save_to_local(vis_patch_img,
                                                labels,
                                                tmp_path,
                                                box_color="blue")
            if self.upload_dataset:
                pre_vis_labels = copy.deepcopy(labels)
                for i in range(n_patches):
                    vis_patch_img, boundary = get_patch(
                        padded_img, patch_width, patch_height, i)

                    labels = filter_and_offset_labels(pre_vis_labels, boundary)

                    tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \
                                        "_patch_{}.jpg".format(i))
                    upload_label_and_image_to_gcloud(vis_patch_img, labels,
                                                     tmp_path)

            else:
                labels = filter_and_offset_labels(labels, boundary)
        else:
            labels = add_padding_on_each_side(labels, horiz_pad, vert_pad)
            labels = scale_labels(labels, ratio)
            labels_temp = labels

            if self.vis_batch:
                tmp_path = os.path.join(visualization_tmp_path,
                                        img_name[:-4] + "_pad_resized.jpg")
                visualize_and_save_to_local(img,
                                            labels,
                                            tmp_path,
                                            box_color="blue")

        labels = labels_temp
        if self.vis_batch and self.data_aug:
            vis_aug_img = copy.deepcopy(img)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + "_before_aug.jpg")
            visualize_and_save_to_local(vis_aug_img,
                                        labels,
                                        tmp_path,
                                        box_color="red")
        if self.augment_hsv or self.data_aug:
            if random.random() > 0.5:
                img = self.jitter(img)
                # no transformation on labels

        # Augment image and labels
        img_width, img_height = img.size
        if self.augment_affine or self.data_aug:
            if random.random() > 0:
                angle = random.uniform(-10, 10)
                translate = (random.uniform(-40,
                                            40), random.uniform(-40,
                                                                40))  ## WORKS
                scale = random.uniform(0.9, 1.1)
                shear = random.uniform(-3, 3)
                img = torchvision.transforms.functional.affine(img,
                                                               angle,
                                                               translate,
                                                               scale,
                                                               shear,
                                                               2,
                                                               fillcolor=(127,
                                                                          127,
                                                                          127))
                labels = affine_labels(img_height, img_width, labels, -angle,
                                       translate, scale, (-shear, 0))

        if self.bw:
            img = torchvision.transforms.functional.to_grayscale(
                img, num_output_channels=1)

        # random left-right flip
        if self.lr_flip:
            if random.random() > 0.5:
                img = torchvision.transforms.functional.hflip(img)
                # Is this correct?
                # Not immediately obvious, when composed with the angle shift above
                labels[:, 1] = img_width - labels[:, 1]
                labels[:, 3] = img_width - labels[:, 3]

        # GaussianBlur, needs further development
        if self.blur:
            if random.random() > 0.2:
                arr = np.asarray(img)
                angle = random.uniform(40, -40)
                sigma = random.uniform(0, 3.00)
                seq = iaa.Sequential([iaa.GaussianBlur(sigma=sigma)])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #AdditiveGaussianNoise
        if self.noise:
            if random.random() > 0.3:
                arr = np.asarray(img)
                scale = random.uniform(0, 0.03 * 255)
                seq = iaa.Sequential([
                    iaa.AdditiveGaussianNoise(loc=0,
                                              scale=scale,
                                              per_channel=0.5)
                ])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #SigmoidContrast, need further development
        if self.contrast:
            if random.random() > 0.5:
                arr = np.asarray(img)
                cutoff = random.uniform(0.45, 0.75)
                gain = random.randint(5, 10)
                seq = iaa.Sequential(
                    [iaa.SigmoidContrast(gain=gain, cutoff=cutoff)])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #Sharpen, need further development
        if self.sharpen:
            if random.random() > 0.3:
                arr = np.asarray(img)
                alpha = random.uniform(0, 0.5)
                seq = iaa.Sharpen(alpha=alpha)
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        if self.vis_batch and self.data_aug:
            vis_post_aug_img = copy.deepcopy(img)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + "_post_augmentation.jpg")
            visualize_and_save_to_local(vis_post_aug_img,
                                        labels,
                                        tmp_path,
                                        box_color="green")

        if self.vis_batch:
            self.vis_counter += 1
            if self.vis_counter > (self.vis_batch - 1):
                sys.exit('Finished visualizing enough images. Exiting!')

        labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
        labels[:, (1, 3)] /= self.width
        labels[:, (2, 4)] /= self.height

        img = torchvision.transforms.functional.to_tensor(img)
        labels = F.pad(labels,
                       pad=[0, 0, 0, self.num_targets_per_image - len(labels)],
                       mode="constant")
        if (labels < 0).sum() > 0:
            raise Exception(f"labels for image {img_uri} have negative values")
        return img_uri, img, labels

Example #13

Show file

    def __getitem__(self, index):
        img_path = self.img_files[index]
        label_path = self.label_files[index]

        # if hasattr(self, 'imgs'):
        #    img = self.imgs[index]  # BGR
        img = cv2.imread(img_path)  # BGR
        assert img is not None, 'File Not Found ' + img_path
        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(
            img, height=self.img_size)  # 将每幅图resize到img_size

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            with open(label_path, 'r') as file:
                lines = file.read().splitlines(
                )  # 每一行的内容: class x_center y_center w h 比如 4 0.43 0.36 0.06 0.24，坐标都是归一化过的
            x = np.array([x.split() for x in lines],
                         dtype=np.float32)  # x: (box_num, 5)
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratio * w * (
                    x[:, 1] - x[:, 3] / 2
                ) + padw  # 因为图像resize了，所以labels中的坐标信息也要相对变化  TODO：理解的不是很透彻
                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
                print(labels)
        # Augment image and labels
        #if self.augment:
        #    img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size

        # TODO
        # if self.augment:
        #     # random left-right flip
        #     lr_flip = True
        #     if lr_flip and random.random() > 0.5:
        #         img = np.fliplr(img)
        #         if nL:
        #             labels[:, 1] = 1 - labels[:, 1]
        #
        #     # random up-down flip
        #     ud_flip = False
        #     if ud_flip and random.random() > 0.5:
        #         img = np.flipud(img)
        #         if nL:
        #             labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros(
            (nL, 6))  # clw note: maybe leave index 0 for batch_size dim
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)

Example #14

Show file

    def __getitem__(self, index):
        img_path = self.img_files[index]
        label_path = self.label_files[index]

        # Load image
        if hasattr(self, 'imgs'):  # preloaded
            img = self.imgs[index]
        else:
            img = cv2.imread(img_path)  # BGR
        assert img is not None, 'File Not Found ' + img_path

        # Letterbox
        h, w, *_ = img.shape
        if self.pad_rectangular:
            new_shape = self.batch_shapes[self.batch[index]]
            img, ratio, padw, padh = letterbox(img,
                                               new_shape=new_shape,
                                               mode='rect')
        else:
            img, ratio, padw, padh = letterbox(img,
                                               new_shape=self.img_size,
                                               mode='square')

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            # with open(label_path, 'r') as f:
            #     x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
            x = self.labels[index]

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img,
                                        labels,
                                        degrees=(-5, 5),
                                        translate=(0.10, 0.10),
                                        scale=(0.90, 1.10))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)

Example #15

Show file

    def detect(self):
        frame_no = -1
        # skip_no = 2

        if self.output_file:
            f = open(output_file, "w")

        while self.vidCap.grab():
            frame_no += 1

            # skip frames every n frames
            # if frame_no % skip_no != 0:
            #     continue

            # start time
            total_begin = time.time()

            _, img = self.vidCap.retrieve()

            # yolov3部分
            yolo_begin = time.time()
            bbox_xyxy, cls_conf, cls_ids = self.yolov3.predict(img)
            # [x1,y1,x2,y2]
            yolo_end = time.time()

            # deepsort部分
            ds_begin = time.time()
            if bbox_xyxy is not None:
                bbox_cxcywh = xyxy2xywh(bbox_xyxy)

                outputs = self.deepsort.update(bbox_cxcywh, cls_conf, img)

                if len(outputs) > 0:
                    # [x1,y1,x2,y2] id
                    bbox_xyxy = outputs[:, :4]
                    ids = outputs[:, -1]
                    img = draw_bboxes(img, bbox_xyxy, ids)

                    # frame,id,tlwh,1,-1,-1,-1
                    if self.output_file:
                        bbox_tlwh = xyxy2xywh(bbox_xyxy)
                        for i in range(len(bbox_tlwh)):
                            write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % (
                                frame_no + 1, outputs[i, -1],
                                int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]),
                                int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3]))
                            f.write(write_line)
            ds_end = time.time()

            total_end = time.time()

            if frame_no % 500 == 0:
                print("frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_no,
                                                                                               (yolo_end - yolo_begin),
                                                                                               (ds_end - ds_begin),
                                                                                               (total_end - total_begin),
                                                                                               ((yolo_end - yolo_begin) * 100 / (
                                                                                                   total_end - total_begin)),
                                                                                               (1 / (total_end - total_begin))))

            if self.display is True:
                cv2.imshow("Test", img)
                cv2.waitKey(1)

            if self.save_path:
                self.output.write(img)

        if self.output_file:
            f.close()

Example #16

Show file

File: datasets.py Project: sleung852/People-Counter-Food-Expo-2019-YOLOV3

    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        # Load image
        img = self.imgs[index]
        if img is None:
            img = cv2.imread(img_path)  # BGR
            assert img is not None, 'File Not Found ' + img_path
            if self.n < 1001:
                self.imgs[index] = img  # cache image into memory

        # Augment colorspace
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50  # must be < 1.0
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
            S = img_hsv[:, :, 1].astype(np.float32)  # saturation
            V = img_hsv[:, :, 2].astype(np.float32)  # value

            a = (random.random() * 2 - 1) * fraction + 1
            b = (random.random() * 2 - 1) * fraction + 1
            S *= a
            V *= b

            img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
            img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        # Letterbox
        h, w, _ = img.shape
        if self.rect:
            shape = self.batch_shapes[self.batch[index]]
            img, ratiow, ratioh, padw, padh = letterbox(img,
                                                        new_shape=shape,
                                                        mode='rect')
        else:
            shape = self.img_size
            img, ratiow, ratioh, padw, padh = letterbox(img,
                                                        new_shape=shape,
                                                        mode='square')

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            x = self.labels[index]
            if x is None:  # labels not preloaded
                with open(label_path, 'r') as f:
                    x = np.array([x.split() for x in f.read().splitlines()],
                                 dtype=np.float32)
                    self.labels[index] = x  # save for next time

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img,
                                        labels,
                                        degrees=(-5, 5),
                                        translate=(0.10, 0.10),
                                        scale=(0.90, 1.10))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)

Example #17

Show file

File: datasets.py Project: jiacnn/yolo3-omr

    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size
        ib = min((self.count + 1) * self.batch_size, self.nF)

        multi_scale = False
        if multi_scale and self.augment:
            # Multi-Scale YOLO Training
            height = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
        else:
            # Fixed-Scale YOLO Training
            height = self.height

        img_all = []
        labels_all = []
        for index, files_index in enumerate(range(ia, ib)):
            img_path = self.img_files[self.shuffled_vector[files_index]]
            label_path = self.label_files[self.shuffled_vector[files_index]]

            img = cv2.imread(img_path)  # BGR
            if img is None:
                print('nooooooooooimages')
                continue

            augment_hsv = True
            if self.augment and augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

            h, w, _ = img.shape
            img, ratio, padw, padh = resize_square(img,
                                                   height=height,
                                                   color=(127.5, 127.5, 127.5))

            # Load labels
            name_classes = load_classes(
                '/Users/jx/Desktop/jjjjjxxxx/omr_yolo3/cfg/new_duration.names')
            pitch_classes = load_classes(
                '/Users/jx/Desktop/jjjjjxxxx/omr_yolo3/cfg/pitch.names')
            if os.path.isfile(label_path):

                labels0 = np.loadtxt(label_path,
                                     dtype=np.float32).reshape(-1, 7)
                # Normalized xywh to pixel xyxy format
                labels = labels0.copy()
                # labels[:, 1] = ratio * w * (labels0[:, 1] - labels0[:, 3] / 2) + padw
                #labels[:, 2] = ratio * h * (labels0[:, 2] - labels0[:, 4] / 2) + padh
                #labels[:, 3] = ratio * w * (labels0[:, 1] + labels0[:, 3] / 2) + padw
                #labels[:, 4] = ratio * h * (labels0[:, 2] + labels0[:, 4] / 2) + padh
                labels[:, 1] = ratio * labels0[:, 1] + padw
                labels[:, 2] = ratio * labels0[:, 2] + padh
                labels[:, 3] = ratio * labels0[:, 3] + padw
                labels[:, 4] = ratio * labels[:, 4] + padh
                durations = []
                pitchs = []
                # for i in labels0[:,5]:
                #    if float(i)==float(1):
                #        durations.append(9)
                #    elif str(i) not in name_classes and str(i)!='0.0':
                #        durations.append(6)
                #    else:
                #        for idx,j in enumerate(name_classes):
                #            if float(i)== float(j):
                #              durations.append(idx)
                for i in labels0[:, 5]:
                    if str(int(i)) in name_classes:
                        durations.append(int(i))
                    else:
                        durations.append(10)
                ##超出音高范围或者没有音高
                for i in labels0[:, 6]:
                    if str(int(i)) in pitch_classes:
                        pitchs.append(int(i))
                    elif int(i) > 15:
                        pitchs.append(15)
                    else:
                        pitchs.append(-5)
                labels[:, 5] = durations
                labels[:, 6] = pitchs
            else:
                labels = np.array([])

            # Augment image and labels
            if self.augment:
                img, labels, M = random_affine(img,
                                               labels,
                                               degrees=(-3, 3),
                                               translate=(0.1, 0.1),
                                               scale=(0.9, 1.1))

            plotFlag = False
            if plotFlag:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 10)) if index == 0 else None
                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
                plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                         labels[:, [2, 2, 4, 4, 2]].T, '.-')
                plt.axis('off')

            nL = len(labels)
            if nL > 0:
                # convert xyxy to xywh
                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height

            if self.augment:
                # random left-right flip
                lr_flip = True
                if lr_flip & (random.random() > 0.5):
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random up-down flip
                ud_flip = False
                if ud_flip & (random.random() > 0.5):
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

            img_all.append(img)
            labels_all.append(torch.from_numpy(labels))

        # Normalize
        assert len(img_all) != 0
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        # img_all -= self.rgb_mean
        # img_all /= self.rgb_std
        img_all /= 255.0
        return torch.from_numpy(img_all), labels_all

Example #18

Show file

    def __getitem__(self, index):
        # 在训练中为false
        if self.image_weights:
            index = self.indices[index]

        hyp = self.hyp
        if self.mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)  # index是一个int类型
            shapes = None

        else:
            # Load image
            # h,w是经过调整之后的 其中有一个值等于img_size img是经过插值之后的图像(且是BGR格式) 其中一边等于img_size
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            # shape存放的height 和 width
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # Load labels
            labels = []
            # self.labels[index]表示index对应的图片中所有的gtbox []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            # 这里的xyxy是未归一化的 xywh也是未归一化的
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]
        # [batch, cls, x, y, w, h]
        labels_out = torch.zeros((nL, 6))  # nl
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        # print(img.shape)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes

Example #19

Show file

File: datasets.py Project: penseesface/RJF_yolo

    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w, _ = img.shape

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w, _ = img.shape

            #Added code to perform warp affine to 640 X 384
            '''
            #cv2.imshow("Before padding: ", img0)
            #cv2.waitKey(0)

            inp_width = 640
            inp_height = 384

            c = np.array([1920 / 2., 1080/ 2.], dtype=np.float32)
            s = 1920

            trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])

            inp_image = cv2.warpAffine(img0, trans_input, (inp_width, inp_height), flags=cv2.INTER_LINEAR)

            #cv2.imshow('Warp Affine: ', inp_image)

            #cv2.waitKey(0)

            #img = letterbox(inp_image, new_shape=self.img_size)[0]

            img = inp_image
            '''
            #End of added warp affine

            if self.rect:
                img, ratio, padw, padh = letterbox(
                    img, self.batch_shapes[self.batch[index]], mode='rect')
                #Hard coded values
                #img, ratio, padw, padh = letterbox(img, np.array([384, 640]), mode='rect')
            else:
                img, ratio, padw, padh = letterbox(img,
                                                   self.img_size,
                                                   mode='square')
            '''
            print("Ratio: ", ratio)
            print("padw: ", padw)
            print('padh: ', padh)

            cv2.imshow('output', img)

            if cv2.waitKey(0) == ord('q'):
                exit()
            '''

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh

        if self.augment:
            # Augment colorspace
            augment_hsv(img,
                        hgain=self.hyp['hsv_h'],
                        sgain=self.hyp['hsv_s'],
                        vgain=self.hyp['hsv_v'])

            # Augment imagespace
            g = 0.0 if mosaic else 1.0  # do not augment mosaics
            hyp = self.hyp
            img, labels = random_affine(img,
                                        labels,
                                        degrees=hyp['degrees'] * g,
                                        translate=hyp['translate'] * g,
                                        scale=hyp['scale'] * g,
                                        shear=hyp['shear'] * g)

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)

Example #20

Show file

File: jde.py Project: HsiuWen/FairMOT

    def get_data(self, img_path, label_path):
        height = self.height
        width = self.width
        img = cv2.imread(img_path)  # BGR
        if img is None:
            raise ValueError('File corrupt {}'.format(img_path))
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            S = img_hsv[:, :, 1].astype(np.float32)
            V = img_hsv[:, :, 2].astype(np.float32)

            a = (random.random() * 2 - 1) * fraction + 1
            S *= a
            if a > 1:
                np.clip(S, a_min=0, a_max=255, out=S)

            a = (random.random() * 2 - 1) * fraction + 1
            V *= a
            if a > 1:
                np.clip(V, a_min=0, a_max=255, out=V)

            img_hsv[:, :, 1] = S.astype(np.uint8)
            img_hsv[:, :, 2] = V.astype(np.uint8)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(img, height=height, width=width)

        # Load labels
        if os.path.isfile(label_path):
            labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6)

            # Normalized xywh to pixel xyxy format
            labels = labels0.copy()
            labels[:,
                   2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw
            labels[:,
                   3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh
            labels[:,
                   4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw
            labels[:,
                   5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh
        else:
            labels = np.array([])

        # Augment image and labels
        if self.augment:
            img, labels, M = random_affine(img,
                                           labels,
                                           degrees=(-5, 5),
                                           translate=(0.10, 0.10),
                                           scale=(0.50, 1.20))

        plotFlag = False
        if plotFlag:
            import matplotlib
            matplotlib.use('Agg')
            import matplotlib.pyplot as plt
            plt.figure(figsize=(50, 50))
            plt.imshow(img[:, :, ::-1])
            plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                     labels[:, [2, 2, 4, 4, 2]].T, '.-')
            plt.axis('off')
            plt.savefig('test.jpg')
            time.sleep(10)

        nL = len(labels)
        if nL > 0:
            # convert xyxy to xywh
            labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy())  # / height
            labels[:, 2] /= width
            labels[:, 3] /= height
            labels[:, 4] /= width
            labels[:, 5] /= height
        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip & (random.random() > 0.5):
                img = np.fliplr(img)
                if nL > 0:
                    labels[:, 2] = 1 - labels[:, 2]

        img = np.ascontiguousarray(img[:, :, ::-1])  # BGR to RGB

        if self.transforms is not None:
            img = self.transforms(img)

        return img, labels, img_path, (h, w)

Example #21

Show file

    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w, _ = img.shape

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w, _ = img.shape
            if self.rect:
                img, ratio, padw, padh = letterbox(
                    img, self.batch_shapes[self.batch[index]], mode='rect')
            else:
                img, ratio, padw, padh = letterbox(img,
                                                   self.img_size,
                                                   mode='square')

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh

        if self.augment:
            # Augment imagespace
            g = 0.0 if mosaic else 1.0  # do not augment mosaics
            hyp = self.hyp
            img, labels = random_affine(img,
                                        labels,
                                        degrees=hyp['degrees'] * g,
                                        translate=hyp['translate'] * g,
                                        scale=hyp['scale'] * g,
                                        shear=hyp['shear'] * g)

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)

Example #22

Show file

File: datasets.py Project: orange-eng/internship

    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        hyp = self.hyp
        if self.mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

            # MixUp https://arxiv.org/pdf/1710.09412.pdf
            # if random.random() < 0.5:
            #     img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
            #     r = np.random.beta(0.3, 0.3)  # mixup ratio, alpha=beta=0.3
            #     img = (img * r + img2 * (1 - r)).astype(np.uint8)
            #     labels = np.concatenate((labels, labels2), 0)

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # Load labels
            labels = []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes

Example #23

Show file

File: datasets.py Project: mozpp/yolov3

    def augment_collection(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic and random.random() < 0.5:  # modify: add random to mosaic
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w = img.shape[:2]

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w = img.shape[:2]
            if self.rect:
                img, ratio, padw, padh = letterbox(
                    img, self.batch_shapes[self.batch[index]], mode='rect')
            else:
                img, ratio, padw, padh = letterbox(img,
                                                   self.img_size,
                                                   mode='square')

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh

        if self.augment:
            # Augment colorspace
            augment_hsv(img,
                        hgain=self.hyp['hsv_h'],
                        sgain=self.hyp['hsv_s'],
                        vgain=self.hyp['hsv_v'])

            # Augment imagespace
            g = 0.0 if mosaic else 1.0  # do not augment mosaics
            g = 1
            hyp = self.hyp
            img, labels = random_affine(img,
                                        labels,
                                        degrees=hyp['degrees'] * g,
                                        translate=hyp['translate'] * g,
                                        scale=hyp['scale'] * g,
                                        shear=hyp['shear'] * g)

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False  # acitve for topview
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        # labels_out = torch.zeros((nL, 6)) # todo: add gt_score to labels_out(i.e. targets)
        labels_out = np.zeros(
            (nL, 7),
            dtype='float32')  # add gt_score to labels_out(i.e. targets)
        if nL:
            # labels_out[:, 1:] = torch.from_numpy(labels)

            labels_out[:, 1] = (labels[:, 0])  # cls
            labels_out[:, 2] = 1  # gt_score
            labels_out[:, 3:] = (labels[:, 1:])  # [x y w h]

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if norm_with_mean_std:
            img[0] = (img[0] - 0.485) / 0.229
            img[1] = (img[1] - 0.456) / 0.224
            img[2] = (img[2] - 0.406) / 0.225

        return img, labels_out, img_path, (h, w)

Example #24

Show file

    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        hyp = self.hyp
        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w = img.shape[:2]
            ratio, pad = None, None

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w = img.shape[:2]
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[
                               0]  # pad width
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[
                               1]  # pad height
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, img_path, ((h, w), (ratio,
                                                                      pad))

Example #25

Show file

File: datasets.py Project: sunjieee/yolov3

    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        hyp = self.hyp
        if self.mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # Load labels
            labels = []
            x = self.labels[index]
            if x is not None and x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
                #labels[:, 5:] = x[:, 5:]    ###6
                for i in range(5, 21, 2):  ###12
                    labels[:, i] = ratio[0] * w * x[:, i] + pad[0]  ###12
                    labels[:,
                           i + 1] = ratio[1] * h * x[:, i + 1] + pad[1]  ###12

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width
            for i in range(5, 21, 2):  ###12
                labels[:, i + 1] /= img.shape[0]  ###12
                labels[:, i] /= img.shape[1]  ###12

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]
                    for i in range(5, 20, 2):  ###12
                        labels[:, i] = 1 - labels[:, i]  ###12

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]
                    for i in range(6, 21, 2):  ###12
                        labels[:, i] = 1 - labels[:, i]  ###12

        labels_out = torch.zeros((nL, 6 + 16))  ###6
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes

Example #26

Show file

    def __getitem__(self, index):  # 重写 Dataset父类的方法
        hyp = self.hyp
        if self.mosaic:
            # load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None
        else:
            # load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scale_up=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # load labels
            labels = []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()  # label: class, x, y, w, h
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp["degrees"],
                                            translate=hyp["translate"],
                                            scale=hyp["scale"],
                                            shear=hyp["shear"])

            # Augment colorspace
            augment_hsv(img,
                        h_gain=hyp["hsv_h"],
                        s_gain=hyp["hsv_s"],
                        v_gain=hyp["hsv_v"])

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0-1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]  # 1 - x_center

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]  # 1 - y_center

        labels_out = torch.zeros((nL, 6))  # nL: number of labels
        if nL:
            # labels_out[:, 0] = index
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert BGR to RGB, and HWC to CHW(3x512x512)
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img)

        return torch.from_numpy(
            img), labels_out, self.img_files[index], shapes, index

Example #27

Show file

File: datasets_bak.py Project: Ronales/Detection-bbox-labelme-assist-annotation-by-using-yolo3-detector-

    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w = img.shape[:2]

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w = img.shape[:2]
            if self.rect:
                img, ratio, padw, padh = letterbox(
                    img, self.batch_shapes[self.batch[index]], mode='rect')
            else:
                img, ratio, padw, padh = letterbox(img,
                                                   self.img_size,
                                                   mode='square')

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh

        if self.augment or self.augment is False:  #notice test need't augment,so the channel is bgr
            # # Augment colorspace
            # augment_hsv(img, hgain=self.hyp['hsv_h'], sgain=self.hyp['hsv_s'], vgain=self.hyp['hsv_v'])
            ## cv2.imshow("xxxx",img)
            ## cv2.waitKey(0)
            # # Augment imagespace
            # g = 0.0 if mosaic else 1.0  # do not augment mosaics
            # hyp = self.hyp
            # img, labels = random_affine(img, labels,
            #                             degrees=hyp['degrees'] * g,
            #                             translate=hyp['translate'] * g,
            #                             scale=hyp['scale'] * g,
            #                             shear= hyp['shear'] *g)
            # Augment colorspace
            img = img[:, :, (2, 1, 0)]  #bgr(cv2) to rgb(plt)
            p = [
                iaa.Multiply([1, 2.5, 0.5, 1.5]),
                iaa.SigmoidContrast(gain=10, cutoff=[0.75, 1, 0.5]),
                # iaa.SigmoidContrast(gain=3,cutoff=[0,1],per_channel=0.9)
            ]
            sequence_iaa = iaa.Sequential([random.choice(p)])
            img = sequence_iaa.augment_image(img)

            # Augment imagespace
            g = 0.0 if mosaic else 1.0  # do not augment mosaics
            hyp = self.hyp
            img, labels = random_affine(img,
                                        labels,
                                        degrees=0,
                                        translate=0,
                                        scale=0,
                                        shear=0)

            # cv2.imshow("xxxx",img)
            # cv2.waitKey(0)

            # print(hyp['shear'] * g)
            # from matplotlib import pyplot as plt
            # plt.imshow(img)
            # plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
            # plt.show()

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        # img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = img[:, :, :].transpose(2, 0, 1)  # RGB, to 3x416x416

        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)