コード例 #1
0
ファイル: dataset_davis.py プロジェクト: ChiangSH/DPMN
def get_one(sample, new_size, args):
    if len(sample) == 4:
        # guide image is both for appearance and location guidance
        guide_image = Image.open(sample[0])
        guide_label = Image.open(sample[1])
        image = Image.open(sample[2])
        label = Image.open(sample[3])
        ref_label = guide_label
    else:
        # guide image is only for appearance guidance, ref label is only for location guidance
        guide_image = Image.open(sample[0])
        guide_label = Image.open(sample[1])
        #guide_image = Image.open(sample[2])
        ref_label = Image.open(sample[2])
        image = Image.open(sample[3])
        label = Image.open(sample[4])
    if len(sample) > 5:
        label_id = sample[5]
    else:
        label_id = 0
    image = image.resize(new_size, Image.BILINEAR)
    label = label.resize(new_size, Image.NEAREST)
    ref_label = ref_label.resize(new_size, Image.NEAREST)
    guide_label = guide_label.resize(guide_image.size, Image.NEAREST)
    if label_id > 0:
        guide_label = _get_obj_mask(guide_label, label_id)
        ref_label = _get_obj_mask(ref_label, label_id)
        label = _get_obj_mask(label, label_id)
    guide_label_data = np.array(guide_label)
    bbox = get_mask_bbox(guide_label_data)
    guide_image = guide_image.crop(bbox)
    guide_label = guide_label.crop(bbox)
    guide_image, guide_label = data_augmentation(
        guide_image,
        guide_label,
        args.guide_size,
        data_aug_flip=args.data_aug_flip,
        keep_aspect_ratio=args.vg_keep_aspect_ratio,
        random_crop_ratio=args.vg_random_crop_ratio,
        random_rotate_angle=args.vg_random_rotate_angle,
        color_aug=args.vg_color_aug)
    if not args.use_original_mask:
        gb_image = get_gb_image(np.array(ref_label),
                                center_perturb=args.sg_center_perturb_ratio,
                                std_perturb=args.sg_std_perturb_ratio)
    else:
        gb_image = perturb_mask(np.array(ref_label))
        gb_image = ndimage.morphology.binary_dilation(
            gb_image, structure=args.dilate_structure) * 255
    image_data = np.array(image, dtype=np.float32)
    label_data = np.array(label, dtype=np.uint8) > 0
    image_data = to_bgr(image_data)
    image_data = (image_data - args.mean_value) * args.scale_value
    guide_label_data = np.array(guide_label, dtype=np.uint8)
    guide_image_data = np.array(guide_image, dtype=np.float32)
    guide_image_data = to_bgr(guide_image_data)
    guide_image_data = (guide_image_data - args.mean_value) * args.scale_value
    guide_image_data = mask_image(guide_image_data, guide_label_data)
    return guide_image_data, gb_image, image_data, label_data
コード例 #2
0
ファイル: dataset_coco.py プロジェクト: ChiangSH/DPMN
    def prefilter(self, dataset):
        res_annos = []
        annos = dataset.dataset['annotations']
        for anno in annos:
            # throw away all crowd annotations
            if anno['iscrowd']: continue

            m = dataset.annToMask(anno)
            mask_area = np.count_nonzero(m)
            if mask_area / float(m.shape[0] * m.shape[1]) > self.fg_thresh:
                anno['bbox'] = get_mask_bbox(m)
                res_annos.append(anno)
        return res_annos
コード例 #3
0
                for prev_frame, frame in zip(test_frames[1:-1], test_frames[2:])]
                    
for name in train_seq_names:
    train_frames = sorted(os.listdir(os.path.join(baseDirImg, name)))
    label_fds = os.listdir(os.path.join(baseDirLabel, name)) if data_version == 2017 else \
            [os.path.join(baseDirLabel, name)]
    for label_id in label_fds:
        # each sample: visual guide image, visual guide mask, spatial guide mask, input image, ground truth mask
        if randomize_guide:
            # filter images to get good quality visual guide images
            valid_label_idx = []
            nonblank_label_idx = []
            for frame in train_frames:
                label = Image.open(os.path.join(baseDirLabel, name, label_id, frame[:-4] + '.png'))
                label_data = np.array(label) > 0
                bbox = get_mask_bbox(label_data, border_pixels=0)
                if np.sum(label_data) > 0:
                    nonblank_label_idx.append(frame)
                if np.sum(label_data) > label_data.size * args.label_valid_ratio and \
                        np.sum(label_data) > (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) * args.bbox_valid_ratio:
                    valid_label_idx.append(frame[:-4])
            train_frames = nonblank_label_idx
            if len(valid_label_idx) > 0:
                # randomly select guide image for each frame
                random_guide_idx = np.random.randint(0, len(valid_label_idx),(len(train_frames)))
            else:
                # default to use the first frame
                valid_label_idx = [train_frames[0][:-4]]
                random_guide_idx = np.zeros((len(train_frames)), dtype=np.int32)
            # use random frame as visual guide and ground truth of previous frame as spatial guide
            train_imgs_with_guide += [(os.path.join(baseDirImg, name, valid_label_idx[guide_id]+'.jpg'),
コード例 #4
0
ファイル: dataset_davis.py プロジェクト: ChiangSH/DPMN
    def next_batch(self, batch_size, phase):
        """Get next batch of image (path) and labels
        Args:
        batch_size: Size of the batch
        phase: Possible options:'train' or 'test'
        Returns in training:
        images: Numpy arrays of the images
        labels: Numpy arrays of the labels
        Returns in testing:
        images: Numpy array of the images
        path: List of image paths
        """
        if phase == 'train':
            if self.train_ptr + batch_size <= self.train_size:
                idx = np.array(self.train_idx[self.train_ptr:self.train_ptr +
                                              batch_size])
                self.train_ptr += batch_size
            else:
                np.random.shuffle(self.train_idx)
                new_ptr = batch_size
                idx = np.array(self.train_idx[:new_ptr])
                self.train_ptr = new_ptr
            guide_images = []
            gb_images = []
            images = []
            labels = []
            if self.data_aug_scales:
                scale = random.choice(self.data_aug_scales)
                new_size = (int(self.size[0] * scale),
                            int(self.size[1] * scale))
            if self.args.num_loader == 1:
                batch = [
                    get_one(self.train_list[i], new_size, self.args)
                    for i in idx
                ]
            else:
                batch = [
                    self.pool.apply(get_one,
                                    args=(self.train_list[i], new_size,
                                          self.args)) for i in idx
                ]
            for guide_image_data, gb_image, image_data, label_data in batch:

                guide_images.append(guide_image_data)
                gb_images.append(gb_image)
                images.append(image_data)
                labels.append(label_data)
            images = np.array(images)
            gb_images = np.array(gb_images)[..., np.newaxis]
            labels = np.array(labels)[..., np.newaxis]
            guide_images = np.array(guide_images)
            return guide_images, gb_images, images, labels
        elif phase == 'test':
            guide_images = []
            gb_images = []
            images = []
            image_paths = []
            self.crop_boxes = []
            self.images = []
            assert batch_size == 1, "Only allow batch size = 1 for testing"
            if self.test_ptr + batch_size < self.test_size:
                idx = np.array(self.test_idx[self.test_ptr:self.test_ptr +
                                             batch_size])
                self.test_ptr += batch_size
            else:
                new_ptr = (self.test_ptr + batch_size) % self.test_size
                idx = np.hstack(
                    (self.test_idx[self.test_ptr:], self.test_idx[:new_ptr]))
                self.test_ptr = new_ptr
            i = idx[0]
            sample = self.test_list[i]
            if len(sample) > 4:
                label_id = sample[4]
            else:
                label_id = 0

            if sample[0] == None:
                # visual guide image / mask is none, only read spatial guide and input image
                first_frame = False
                ref_label = Image.open(sample[2])
                image = Image.open(sample[3])
                frame_name = sample[3].split('/')[-1].split('.')[0] + '.png'
                if len(sample) > 5:
                    # vid_path/label_id/frame_name
                    ref_name = os.path.join(sample[5], frame_name)
                elif self.multiclass:
                    # seq_name/label_id/frame_name
                    ref_name = os.path.join(*(sample[2].split('/')[-3:-1] +
                                              [frame_name]))
                else:
                    # seq_name/frame_name
                    ref_name = os.path.join(sample[2].split('/')[-2],
                                            frame_name)
            else:
                # only process visual guide image / mask
                first_frame = True
                guide_image = Image.open(sample[0])
                guide_label = Image.open(sample[1])
                if len(sample) > 5:
                    # vid_path/label_id/frame_name
                    ref_name = os.path.join(sample[5],
                                            sample[1].split('/')[-1])
                elif self.multiclass:
                    # seq_name/label_id/frame_name
                    ref_name = os.path.join(*(sample[1].split('/')[-3:]))
                else:
                    # seq_name/frame_name
                    ref_name = os.path.join(*(sample[1].split('/')[-2:]))
            if not first_frame:
                if len(self.size) == 2:
                    self.new_size = self.size
                else:
                    # resize short size of image to self.size[0]
                    resize_ratio = max(
                        float(self.size[0]) / image.size[0],
                        float(self.size[0]) / image.size[1])
                    self.new_size = (int(resize_ratio * image.size[0]),
                                     int(resize_ratio * image.size[1]))
                ref_label = ref_label.resize(self.new_size, Image.NEAREST)
                if label_id > 0:
                    ref_label = _get_obj_mask(ref_label, label_id)
                ref_label_data = np.array(ref_label)
                image_ref_crf = image.resize(self.new_size, Image.BILINEAR)
                self.images.append(np.array(image_ref_crf))
                image = image.resize(self.new_size, Image.BILINEAR)
                if self.use_original_mask:
                    gb_image = ndimage.morphology.binary_dilation(
                        ref_label_data,
                        structure=self.args.dilate_structure) * 255
                else:
                    gb_image = get_gb_image(ref_label_data,
                                            center_perturb=0,
                                            std_perturb=0)
                image_data = np.array(image, dtype=np.float32)
                image_data = to_bgr(image_data)
                image_data = (image_data - self.mean_value) * self.scale_value
                gb_images.append(gb_image)
                images.append(image_data)
                images = np.array(images)
                gb_images = np.array(gb_images)[..., np.newaxis]
                guide_images = None
            else:
                # process visual guide images
                # resize to same size of guide_image first, in case of full resolution input
                guide_label = guide_label.resize(guide_image.size,
                                                 Image.NEAREST)
                if label_id > 0:
                    guide_label = _get_obj_mask(guide_label, label_id)
                bbox = get_mask_bbox(np.array(guide_label))
                guide_image = guide_image.crop(bbox)
                guide_label = guide_label.crop(bbox)
                guide_image, guide_label = data_augmentation(
                    guide_image,
                    guide_label,
                    self.args.guide_size,
                    data_aug_flip=False,
                    pad_ratio=self.vg_pad_ratio,
                    keep_aspect_ratio=self.vg_keep_aspect_ratio)

                guide_image_data = np.array(guide_image, dtype=np.float32)
                guide_image_data = to_bgr(guide_image_data)
                guide_image_data = (guide_image_data -
                                    self.mean_value) * self.scale_value
                guide_label_data = np.array(guide_label, dtype=np.uint8)
                if not self.bbox_sup:
                    guide_image_data = mask_image(guide_image_data,
                                                  guide_label_data)
                guide_images.append(guide_image_data)
                guide_images = np.array(guide_images)
                images = None
                gb_images = None
            image_paths.append(ref_name)
            return guide_images, gb_images, images, image_paths
        else:
            return None, None, None, None
コード例 #5
0
    def next_batch(self, batch_size, phase):
        """Get next batch of image (path) and labels
        Args:
        batch_size: Size of the batch
        phase: Possible options:'train' or 'test'
        Returns in training:
        images: Numpy arrays of the images
        labels: Numpy arrays of the labels
        Returns in testing:
        images: Numpy array of the images
        path: List of image paths
        """
        if phase == 'train':
            if self.train_ptr + batch_size <= self.train_size:
                idx = np.array(self.train_idx[self.train_ptr:self.train_ptr +
                                              batch_size])
                self.train_ptr += batch_size
            else:
                np.random.shuffle(self.train_idx)
                new_ptr = batch_size
                idx = np.array(self.train_idx[:new_ptr])
                self.train_ptr = new_ptr
            guide_images = []
            gb_images = []
            images = []
            labels = []
            if self.data_aug_scales:
                scale = random.choice(self.data_aug_scales)
                new_size = (int(self.size[0] * scale),
                            int(self.size[1] * scale))
            for i in idx:
                sample = self.train_list[i]
                if len(sample) == 4:
                    # guide image is both for appearance and location guidance
                    guide_image = Image.open(sample[0])
                    guide_label = Image.open(sample[1])
                    image = Image.open(sample[2])
                    label = Image.open(sample[3])
                    ref_label = guide_label
                else:
                    # guide image is only for appearance guidance, ref label is only for location guidance
                    guide_image = Image.open(sample[0])
                    guide_label = Image.open(sample[1])
                    #guide_image = Image.open(sample[2])
                    ref_label = Image.open(sample[2])
                    image = Image.open(sample[3])
                    label = Image.open(sample[4])
                image = image.resize(new_size, Image.BILINEAR)
                label = label.resize(new_size, Image.NEAREST)
                ref_label = ref_label.resize(new_size, Image.NEAREST)
                guide_label = guide_label.resize(guide_image.size,
                                                 Image.NEAREST)
                bbox = get_mask_bbox(np.array(guide_label))
                guide_image = guide_image.crop(bbox)
                guide_label = guide_label.crop(bbox)
                guide_image, guide_label = data_augmentation(
                    guide_image,
                    guide_label,
                    self.guide_size,
                    data_aug_flip=self.data_aug_flip,
                    keep_aspect_ratio=self.vg_keep_aspect_ratio,
                    random_crop_ratio=self.vg_random_crop_ratio,
                    random_rotate_angle=self.vg_random_rotate_angle,
                    color_aug=self.vg_color_aug)
                if not self.use_original_mask:
                    gb_image = get_gb_image(
                        np.array(ref_label),
                        center_perturb=self.sg_center_perturb_ratio,
                        std_perturb=self.sg_std_perturb_ratio)
                else:
                    gb_image = perturb_mask(np.array(ref_label))
                    gb_image = ndimage.morphology.binary_dilation(
                        gb_image, structure=self.dilate_structure) * 255
                image_data = np.array(image, dtype=np.float32)
                label_data = np.array(label, dtype=np.uint8) > 0
                image_data = to_bgr(image_data)
                image_data -= self.mean_value
                guide_label_data = np.array(guide_label, dtype=np.uint8)
                guide_image_data = np.array(guide_image, dtype=np.float32)
                guide_image_data = to_bgr(guide_image_data)
                guide_image_data -= self.mean_value
                if not self.bbox_sup:
                    guide_image_data = mask_image(guide_image_data,
                                                  guide_label_data)
                guide_images.append(guide_image_data)
                gb_images.append(gb_image)
                images.append(image_data)
                labels.append(label_data)
            images = np.array(images)
            gb_images = np.array(gb_images)[..., np.newaxis]
            labels = np.array(labels)[..., np.newaxis]
            guide_images = np.array(guide_images)
            return guide_images, gb_images, images, labels
        elif phase == 'test':
            guide_images = []
            gb_images = []
            images = []
            image_paths = []
            self.crop_boxes = []
            self.images = []
            assert batch_size == 1, "Only allow batch size = 1 for testing"
            if self.test_ptr + batch_size < self.test_size:
                idx = np.array(self.test_idx[self.test_ptr:self.test_ptr +
                                             batch_size])
                self.test_ptr += batch_size
            else:
                new_ptr = (self.test_ptr + batch_size) % self.test_size
                idx = np.hstack(
                    (self.test_idx[self.test_ptr:], self.test_idx[:new_ptr]))
                self.test_ptr = new_ptr
            i = idx[0]
            sample = self.test_list[i]
            if sample[0] == None:
                # visual guide image / mask is none, only read spatial guide and input image
                first_frame = False
                ref_label = Image.open(sample[2])
                image = Image.open(sample[3])
                frame_name = sample[3].split('/')[-1].split('.')[0] + '.png'
                if self.multiclass:
                    # seq_name/label_id/frame_name
                    ref_name = os.path.join(*(sample[2].split('/')[-3:-1] +
                                              [frame_name]))
                else:
                    # seq_name/frame_name
                    ref_name = os.path.join(sample[2].split('/')[-2],
                                            frame_name)
            else:
                # only process visual guide image / mask
                first_frame = True
                guide_image = Image.open(sample[0])
                guide_label = Image.open(sample[1])
                if self.multiclass:
                    # seq_name/label_id/frame_name
                    ref_name = os.path.join(*(sample[1].split('/')[-3:]))
                else:
                    # seq_name/frame_name
                    ref_name = os.path.join(*(sample[1].split('/')[-2:]))
            if not first_frame:
                if len(self.size) == 2:
                    self.new_size = self.size
                else:
                    # resize short size of image to self.size[0]
                    resize_ratio = max(
                        float(self.size[0]) / image.size[0],
                        float(self.size[0]) / image.size[1])
                    self.new_size = (int(resize_ratio * image.size[0]),
                                     int(resize_ratio * image.size[1]))
                ref_label = ref_label.resize(self.new_size, Image.NEAREST)
                ref_label_data = np.array(ref_label) / 255
                gb_image = get_gb_image(ref_label_data,
                                        center_perturb=0,
                                        std_perturb=0)
                image_ref_crf = image.resize(self.new_size, Image.BILINEAR)
                self.images.append(np.array(image_ref_crf))
                image = image.resize(self.new_size, Image.BILINEAR)
                if self.use_original_mask:
                    gb_image = ndimage.morphology.binary_dilation(
                        ref_label_data, structure=self.dilate_structure) * 255
                image_data = np.array(image, dtype=np.float32)
                image_data = to_bgr(image_data)
                image_data -= self.mean_value
                gb_images.append(gb_image)
                images.append(image_data)
                images = np.array(images)
                gb_images = np.array(gb_images)[..., np.newaxis]
                guide_images = None
            else:
                # process visual guide images
                guide_label = guide_label.resize(guide_image.size,
                                                 Image.NEAREST)
                bbox = get_mask_bbox(np.array(guide_label))
                guide_image = guide_image.crop(bbox)
                guide_label = guide_label.crop(bbox)
                guide_image, guide_label = data_augmentation(
                    guide_image,
                    guide_label,
                    self.guide_size,
                    data_aug_flip=False,
                    pad_ratio=self.vg_pad_ratio,
                    keep_aspect_ratio=self.vg_keep_aspect_ratio)

                #guide_image = guide_image.resize(self.guide_size, Image.BILINEAR)
                #guide_label = guide_label.resize(self.guide_size, Image.NEAREST)
                guide_image_data = np.array(guide_image, dtype=np.float32)
                guide_image_data = to_bgr(guide_image_data)
                guide_image_data -= self.mean_value
                guide_label_data = np.array(guide_label, dtype=np.uint8)
                if not self.bbox_sup:
                    guide_image_data = mask_image(guide_image_data,
                                                  guide_label_data)
                guide_images.append(guide_image_data)
                guide_images = np.array(guide_images)
                images = None
                gb_images = None
            image_paths.append(ref_name)
            return guide_images, gb_images, images, image_paths
        else:
            return None, None, None, None