コード例 #1
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))

        img = self.transform(img)
        return img, boxes, labels
コード例 #2
0
ファイル: datagen.py プロジェクト: hopstone/pytorch-retinanet
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels
コード例 #3
0
def data_augmentation(img_name, idx):
    '''
    Data Augmentation on the segmented image. The file is from pytorch_pytorch_retinanet.retinanet_dataset.py
    All transformation method are import from transform.py, which are from pytorch_retinanet.utils
    
    Note:
    Right Now, it is randomly augment the image
    '''
    img = Image.open(os.path.join(SEG_IMG_DIR, img_name))
    if img.mode != 'RGB':
        img = img.convert('RGB')

    size = 600  # the desired image size

    img = random_flip(img)
    img = random_crop(img)
    img = resize(img, (size, size))
    if random.random() > 0.5:
        img = ImageEnhance.Color(img).enhance(random.uniform(0, 1))
        img = ImageEnhance.Brightness(img).enhance(random.uniform(0.5, 2))
        img = ImageEnhance.Contrast(img).enhance(random.uniform(0.5, 1.5))
        img = ImageEnhance.Sharpness(img).enhance(random.uniform(0.5, 1.5))


#         im1 = img.filter(ImageFilter.BLUR) # Gaussian Blur
    else:
        img = resize(img, (size, size))
        # img, boxes = center_crop(img, boxes, (size, size))

    filename = img_name[:-4] + "_" + str(idx) + ".png"
    img.save(os.path.join(SEG_IMG_DIR + "/transformed", filename), "PNG")
コード例 #4
0
    def __getitem__(self, index):
        fname = os.path.join(self.im_pth, self.fnames[index])
        img = Image.open(fname)
        if img.mode!='RGB':
            img = img.convert('RGB')
        boxes = self.boxes[index].clone()
        size = self.size
        #print(img.size)
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, size)
        else:
            img, boxes = center_crop(img, boxes, size)
            img, boxes = resize(img, boxes, size)
        if self.transform is not None:
            img = self.transform(img)

        dense_map = torch.zeros([1, img.size()[1], img.size()[2]], dtype=torch.float32)
        #print(dense_map.size())
        box_num = 0
        for box in boxes:
            area = (box[2]-box[0])*(box[3]-box[1])
            #print(box[0], box[1], box[2], box[3], area)
            if area<100.:
                continue
            box_num += 1
            try:
                dense_map[:, box[1].type(torch.int32):box[3].type(torch.int32), box[0].type(torch.int32):box[2].type(torch.int32)] += 1/area
            except:
                print(fname, dense_map.size())
                print(box[1].type(torch.int32), box[3].type(torch.int32), box[0].type(torch.int32), box[2].type(torch.int32), area)
        return img, dense_map, box_num
コード例 #5
0
ファイル: datagen.py プロジェクト: lizhe960118/find-star
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]

        prefix_name = fname[:2]

        if self.train:
            image_path = self.root + '/' + prefix_name + '/' + fname
        else:
            image_path = self.root + '/' + prefix_name + '/' + fname
            
        
        # img = Image.open(os.path.join(self.root, fname))

        img_a = Image.open(image_path + '_a.jpg')
        img_b = Image.open(image_path + '_b.jpg')
        img_c = Image.open(image_path + '_c.jpg')
        img = Image.merge('RGB', (img_a, img_b, img_c))


        # if img.mode != 'RGB':
            # img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        # if self.transforms is not None:
        #     # if img is a byte or uint8 array, it will convert from 0-255 to 0-1
        #     # this converts from (HxWxC) to (CxHxW) as well
        #     img_a, img_b, img_c = image
        #     img_a = self.transforms(img_a)
        #     img_b = self.transforms(img_b)
        #     img_c = self.transforms(img_c)
        #     img = (img_a, img_b, img_c)

        return img, boxes, labels
コード例 #6
0
ファイル: datagen.py プロジェクト: nealeaf/pytorch-retinanet
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')
        width, height = img.size

        flabel = fname.replace('images/',
                               'labels/').replace('.jpg', '.txt').replace(
                                   '.png', '.txt').replace('.jpeg', '.txt')
        box = []
        label = []
        with open(flabel) as f:
            lines = f.readlines()
            for line in lines:
                ls = line.strip().split()
                x = float(ls[1]) * width
                y = float(ls[2]) * height
                w = float(ls[3]) * width
                h = float(ls[4]) * height
                box.append([x - w / 2, y - h / 2, x + w / 2, y + h / 2])
                label.append(int(ls[0]))

        boxes = torch.Tensor(box)
        labels = torch.LongTensor(label)
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))

        img = self.transform(img)
        return img, boxes, labels
コード例 #7
0
    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img_path = os.path.join(self.root, fname)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        src_shape = self.shape_list[idx]

        att_map = np.zeros([src_shape[0], src_shape[1]])

        for att_box in boxes:
            att_map[int(att_box[0]):int(att_box[2]),
                    int(att_box[1]):int(att_box[3])] = 1

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))
        att_map = Image.fromarray(att_map)
        att_map = att_map.resize((size // 2, size // 2), Image.BILINEAR)

        #img.save('test_in_datagen.jpg')

        img = self.transform(img)
        att_map = self.transform(att_map)

        return img, boxes, labels, att_map, img_path
コード例 #8
0
    def __getitem__(self, idx):
        '''Load image.
        Args:
          idx: (int) image index.
        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, (size, size))
            # img, boxes = center_crop(img, boxes, (size,size))
        img = self.transform(img)

        if self.is_debug:
            filled_labels = np.zeros((self.max_objects, 4), dtype=np.float32)
            if boxes is not None:
                filled_labels[range(
                    len(boxes))[:self.max_objects]] = boxes[:self.max_objects]
            else:
                print('no object')
            filled_labels = torch.from_numpy(filled_labels)
            return img, filled_labels
        else:
            #error_indices_1 = np.where(labels<0)
            #error_indices_2 = np.where(labels>19)
            #print('_______:',labels[error_indices_1])
            #print(labels.shape)
            return img, boxes, labels, fname
コード例 #9
0
ファイル: datagen.py プロジェクト: Gmy12138/RetinaNet
    def __getitem__(self, index):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.

        img_path = self._imgpath[index].rstrip()
        fname = img_path.split('/')[-1].split('.')[0]

        # print(img_path)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')

        label_path = self._labpath[index].rstrip()
        # print(label_path)

        targets = np.loadtxt(label_path).reshape(-1, 5)
        # targets = np.array(targets)
        # print(targets)
        boxes = torch.Tensor(targets[:, 1:])
        labels = torch.LongTensor(targets[:, 0])

        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, (size, size))
            # img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels, fname
コード例 #10
0
ファイル: get_data.py プロジェクト: apletea/machine_learning
    def read_from_disk(self,queue):
        # allocating memory
        max_tag_len = 128
        image       = np.zeros((self.input_size[0], self.input_size[1],3), dtype=np.float32)
        heatmaps_tl = np.zeros((self.output_size[0], self.output_size[1],self.categories), dtype=np.float32)
        heatmaps_br = np.zeros((self.output_size[0], self.output_size[1],self.categories), dtype=np.float32)
        offsets_tl    = np.zeros((max_tag_len, 2), dtype=np.float32)
        offsets_br    = np.zeros((max_tag_len, 2), dtype=np.float32)
        tags_tl     = np.zeros((max_tag_len), dtype=np.int64)
        tags_br     = np.zeros((max_tag_len), dtype=np.int64)
        tags_mask   = np.zeros((max_tag_len), dtype=np.float32)
        boxes       = np.zeros((max_tag_len,4), dtype=np.int64)
        ratio       = np.ones((max_tag_len,2), dtype=np.float32)
        tag_lens    = 0

        # reading image
        image=self.coco.read_img(queue[0])

        # reading detections
        detections = self.coco.detections(queue[0])

        # cropping an image randomly
        if self.rand_crop:
            image, detections = random_crop(image, detections, self.rand_scales, self.input_size, border=self.border)
        else:
            image, detections = full_image_crop(image, detections)

        image, detections = resize_image(image, detections, self.input_size)
        detections = clip_detections(image, detections)

        width_ratio  = self.output_size[1] / self.input_size[1]
        height_ratio = self.output_size[0] / self.input_size[0]

        # flipping an image randomly
        if np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width    = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1


        image = image.astype(np.float32) / 255.
        # if rand_color:
        #     color_jittering_(data_rng, image)
        #     if lighting:
        #         lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)

        #normalize_(image, self.coco.mean, self.coco.std)

        for ind, detection in enumerate(detections):
            category = int(detection[-1]) - 1

            xtl_ori, ytl_ori = detection[0], detection[1]
            xbr_ori, ybr_ori = detection[2], detection[3]

            fxtl = (xtl_ori * width_ratio)
            fytl = (ytl_ori * height_ratio)
            fxbr = (xbr_ori * width_ratio)
            fybr = (ybr_ori * height_ratio)


            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)


            if self.gaussian_bump:
                width  = detection[2] - detection[0]
                height = detection[3] - detection[1]

                width  = math.ceil(width * width_ratio)
                height = math.ceil(height * height_ratio)

                if self.gaussian_rad == -1:
                    radius = gaussian_radius((height, width), self.gaussian_iou)
                    radius = max(0, int(radius))
                else:
                    radius = self.gaussian_rad

                draw_gaussian(heatmaps_tl[:,:,category], [xtl, ytl], radius)
                draw_gaussian(heatmaps_br[:,:,category], [xbr, ybr], radius)
            else:
                heatmaps_tl[ytl, xtl, category] = 1
                heatmaps_br[ybr, xbr, category] = 1

            tag_ind = tag_lens
            offsets_tl[tag_ind, :] = [fxtl - xtl, fytl - ytl]
            offsets_br[tag_ind, :] = [fxbr - xbr, fybr - ybr]
            tags_tl[tag_ind] = ytl * self.output_size[1] + xtl
            tags_br[tag_ind] = ybr * self.output_size[1] + xbr
            boxes[tag_ind] = [xtl_ori,ytl_ori,xbr_ori,ybr_ori]
            ratio[tag_ind] = [width_ratio,height_ratio]
            tag_lens += 1
        tags_mask[:tag_lens] = 1
        return image, tags_tl, tags_br,heatmaps_tl, heatmaps_br, tags_mask, offsets_tl, offsets_br,boxes,ratio