Ejemplos de update en Python, ejemplos de torch.utils.data.update en Python

Ejemplo n.º 1

0

Mostrar archivo

    def __getitem__(self, index):
        data = super(OriDatasetHist, self).__getitem__(index)
        mask_v = data["mask_v"]
        mask_h = data["mask_h"]
        weights = data["weights"]
        idx = data["angle_range_label"]

        bin_label_v, weights_v = self.create_bin_label(mask_v, idx, weights)
        # self.debug_bin_label(bin_label_v, idx)
        # self.debug_bin_label(weights_v, idx)
        bin_label_h, weights_h = self.create_bin_label(mask_h, idx, weights)
        # self.debug_bin_label(bin_label_h, idx)
        # self.debug_bin_label(weights_h, idx)

        softmax_label_v = self.create_softmax_label(mask_v, idx)
        softmax_label_h = self.create_softmax_label(mask_h, idx)

        # plt.figure()
        # plt.imshow(softmax_label_v)
        # plt.figure()
        # plt.imshow(softmax_label_h)
        # plt.show()

        data.update(bin_label_v=bin_label_v,
                    weights_v=weights_v,
                    bin_label_h=bin_label_h,
                    weights_h=weights_h,
                    softmax_label_v=softmax_label_v,
                    softmax_label_h=softmax_label_h)

        return data

Ejemplo n.º 2

0

Mostrar archivo

Archivo: hum36.py Proyecto: nazarblch/brule2

    def __getitem__(self, index):
        # sample
        cond_A_img, A_img, cond_A_path, A_paths, paired_cond_B, paired_B = self._get_sample(self.dataset, index)

        # sample B
        _, _, _, _, _, B = self._get_sample(self.skeleton_dataset, index, load_image=False)

        # normalize keypoints
        paired_cond_B = utils.normalize_points(
            paired_cond_B, self.fineSize, self.fineSize)
        paired_B = utils.normalize_points(
            paired_B, self.fineSize, self.fineSize)
        B = utils.normalize_points(
            B, self.fineSize, self.fineSize)

        if self.load_images:
            A = self.A_transform(A_img)
            cond_A = self.A_transform(cond_A_img)

        data = {'B': torch.from_numpy(B),
                'paired_cond_B': torch.from_numpy(paired_cond_B),
                'paired_B': torch.from_numpy(paired_B),
                'A_paths': A_paths, 'cond_A_path': cond_A_path}
        if self.load_images:
            data.update({'A': A, 'cond_A': cond_A})
        return data

Ejemplo n.º 3

0

Mostrar archivo

    def __getitem__(self, index):

        data = super(OriDataset, self).__getitem__(index)
        weight = data['weights']
        label_test = data['label_test']

        mask = np.zeros(label_test.shape, dtype=np.float32)
        mask_v = np.zeros(label_test.shape, dtype=np.float32)
        mask_h = np.zeros(label_test.shape, dtype=np.float32)
        if np.any(label_test == 1):
            mask_v, mask_h, mask = self.create_mask(label_test,
                                                    weight,
                                                    width=16)

        mask_test = self.create_mask_test(label_test,
                                          weight,
                                          width1=24,
                                          width2=8)

        data.update(mask_v=mask_v,
                    mask_h=mask_h,
                    mask=mask,
                    mask_test=mask_test)

        return data

Ejemplo n.º 4

0

Mostrar archivo

Archivo: nli_02_models_trials2.py Proyecto: abgoswam/cs224u

def predict_annotated_example(ann, experiment_results):
    model = experiment_results['model']
    phi = experiment_results['phi']
    ex = ann['example']
    prem = ex.sentence1_parse
    hyp = ex.sentence2_parse
    feats = phi(prem, hyp)
    pred = model.predict([feats])[0]
    gold = ex.gold_label
    data = {cat: True for cat in ann['annotations']}
    data.update({'gold': gold, 'prediction': pred, 'correct': gold == pred})
    return data

Ejemplo n.º 5

0

Mostrar archivo

Archivo: attribute_reid_dataset.py Proyecto: kilianyp/person-multi-task-dataset

 def __getitem__(self, index):
     data = self.reid_dataset[index]
     # attribute dataset expects rewritten pid
     # rewriting is possible
     if data['pid'] in [-1, 0]:
         # we need to write some data
         # this will be later ignored
         mapped = 0
     else:
         mapped = self.label_dic[data['pid']]
     attribute_data = self.attribute_dataset[mapped]
     data.update(attribute_data)
     return data

Ejemplo n.º 6

0

Mostrar archivo

    def load_dataset(self, partition, size=(84, 84)):
        print("Loading dataset")
        if partition == 'train_val':
            with open(
                    os.path.join(self.root, 'compacted_datasets',
                                 'mini_imagenet_%s.pickle' % 'train'),
                    'rb') as handle:
                data = pickle.load(handle)
            with open(
                    os.path.join(self.root, 'compacted_datasets',
                                 'mini_imagenet_%s.pickle' % 'val'),
                    'rb') as handle:
                data_val = pickle.load(handle)
            data.update(data_val)
            del data_val
        else:
            with open(
                    os.path.join(self.root, 'compacted_datasets',
                                 'mini_imagenet_%s.pickle' % partition),
                    'rb') as handle:
                data = pickle.load(handle)

        with open(
                os.path.join(self.root, 'compacted_datasets',
                             'mini_imagenet_label_encoder.pickle'),
                'rb') as handle:
            label_encoder = pickle.load(handle)

        # Resize images and normalize
        for class_ in data:
            for i in range(len(data[class_])):
                image2resize = pil_image.fromarray(np.uint8(data[class_][i]))
                image_resized = image2resize.resize((size[1], size[0]))
                image_resized = np.array(image_resized, dtype='float32')

                # Normalize
                image_resized = np.transpose(image_resized, (2, 0, 1))
                image_resized[0, :, :] -= 120.45  # R
                image_resized[1, :, :] -= 115.74  # G
                image_resized[2, :, :] -= 104.65  # B
                image_resized /= 127.5

                data[class_][i] = image_resized

        print("Num classes " + str(len(data)))
        num_images = 0
        for class_ in data:
            num_images += len(data[class_])
        print("Num images " + str(num_images))
        return data, label_encoder

Ejemplo n.º 7

0

Mostrar archivo

    def __getitem__(self, index):
        data = super(MultiTaskDataset_v2, self).__getitem__(index)
        label = data['label']

        mask = (label != 255)
        label_3c = label.copy()
        label_3c[label == 0] = 1
        label_3c[mask] -= 1

        label_2c = (label == 1).astype(np.float32)
        weights = np.logical_or(label == 1, label == 0).astype(np.float32)

        data.update(label_3c=label_3c, label_2c=label_2c, weights=weights)
        return data

Ejemplo n.º 8

0

Mostrar archivo

Archivo: dataloader.py Proyecto: salesforce/speech-datasets

 def collate_fn(self, items):
     batch = []
     items = itertools.chain.from_iterable(items)
     for uttid, data in items:
         aux_info = self.aux_utt_info.get(uttid, {})
         aux_info.pop("length", None)
         data.update(aux_info)
         data["x"] = torch.from_numpy(data["x"]).float()
         if self.tokenizer is not None:
             data["labels"] = torch.tensor(
                 self.tokenizer.text2ids(data["text"]))
         data.pop("rate", None)
         data["uttid"] = uttid
         batch.append(data)
     return batch

Ejemplo n.º 9

0

Mostrar archivo

    def __getitem__(self, index):
        data = self.filtered_data[index]

        if self.config.use_cache:
            cache_path = self.get_cache_name(data)
            if not os.path.exists(cache_path):
                self.save_cache(data)

            with open(cache_path, 'rb') as file:
                processed_data = pickle.load(file)
                file.close()
        else:
            processed_data = self.preprocess(data)

        data.update(processed_data)
        return data

Ejemplo n.º 10

0

Mostrar archivo

    def __getitem__(self, index):
        data = super(MultiTaskDataset, self).__getitem__(index)
        label = data['label']

        mask = (label != 255)
        label_3c = label.copy()
        label_3c[label == 0] = 1
        label_3c[mask] -= 1

        label_2c = label.copy()
        label_2c[np.logical_or(label == 2, label == 3)] = 255

        label_3c[label_3c == 255] = self.ignore_label
        label_2c[label_2c == 255] = self.ignore_label

        data.update(label_3c=label_3c, label_2c=label_2c)
        return data

Ejemplo n.º 11

0

Mostrar archivo

Archivo: static_detmot.py Proyecto: reinforcementdriving/MOTR

 def __getitem__(self, idx):
     images, targets = self.pre_continuous_frames(idx)
     data = {}
     if self._transforms is not None:
         images, targets = self._transforms(images, targets)
     gt_instances = []
     for img_i, targets_i in zip(images, targets):
         gt_instances_i = self._targets_to_instances(
             targets_i, img_i.shape[1:3])
         gt_instances.append(gt_instances_i)
     data.update({
         'imgs': images,
         'gt_instances': gt_instances,
     })
     if self.args.vis:
         data['ori_img'] = [target_i['ori_img'] for target_i in targets]
     return data

Ejemplo n.º 12

0

Mostrar archivo

def read_dir(data_dir):
    clients = []
    groups = []
    data = defaultdict(lambda: None)

    files = os.listdir(data_dir)
    files = [f for f in files if f.endswith('.json')]
    for f in files:
        file_path = os.path.join(data_dir, f)
        with open(file_path, 'r') as inf:
            cdata = json.load(inf)
        clients.extend(cdata['users'])
        if 'hierarchies' in cdata:
            groups.extend(cdata['hierarchies'])
        data.update(cdata['user_data'])

    clients = list(sorted(data.keys()))
    return clients, groups, data

Ejemplo n.º 13

0

Mostrar archivo

 def __getitem__(self, idx):
     sample_start, sample_end, sample_interval = self._get_sample_range(idx)
     images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval)
     data = {}
     dataset_name = targets[0]['dataset']
     transform = self.dataset2transform[dataset_name]
     if transform is not None:
         images, targets = transform(images, targets)
     gt_instances = []
     for img_i, targets_i in zip(images, targets):
         gt_instances_i = self._targets_to_instances(targets_i, img_i.shape[1:3])
         gt_instances.append(gt_instances_i)
     data.update({
         'imgs': images,
         'gt_instances': gt_instances,
     })
     if self.args.vis:
         data['ori_img'] = [target_i['ori_img'] for target_i in targets]
     return data

Ejemplo n.º 14

0

Mostrar archivo

Archivo: data.py Proyecto: guoyurong0104/EGNN_ATRM

    def load_dataset(self):
        print("Loading dataset")
        data = {}
        if self.partition == 'train':
            num_partition = 18
        elif self.partition == 'val':
            num_partition = 5
        elif self.partition == 'test':
            num_partition = 8

        partition_count = 0
        for i in range(num_partition):
            partition_count = partition_count + 1
            with open(
                    os.path.join(
                        self.root, 'tiered-imagenet/compacted_datasets',
                        'tiered_imagenet_{}_{}.pickle'.format(
                            self.partition, partition_count)), 'rb') as handle:
                data.update(pickle.load(handle))

        # Resize images and normalize
        for class_ in data:
            for i in range(len(data[class_])):
                image2resize = pil_image.fromarray(np.uint8(data[class_][i]))
                image_resized = image2resize.resize(
                    (self.data_size[2], self.data_size[1]))
                image_resized = np.array(image_resized, dtype='float32')

                # Normalize
                image_resized = np.transpose(image_resized, (2, 0, 1))
                image_resized[0, :, :] -= 120.45  # R
                image_resized[1, :, :] -= 115.74  # G
                image_resized[2, :, :] -= 104.65  # B
                image_resized /= 127.5

                data[class_][i] = image_resized

        print("Num classes " + str(len(data)))
        num_images = 0
        for class_ in data:
            num_images += len(data[class_])
        print("Num images " + str(num_images))
        return data

Ejemplo n.º 15

0

Mostrar archivo

Archivo: roads.py Proyecto: davidgj94/pytorch-tools

    def __getitem__(self, index):

        image_id, image, label = self._load_data(index)
        image, label = self.augmentations(image, label)
        vis_image = image.copy()
        image = TF.to_tensor(image)
        image = TF.normalize(image, self.mean, self.var)

        label = label.astype(np.float32)
        image = image.numpy()

        data = dict(image_id=image_id,
                    image=image,
                    label=label,
                    weights=np.ones_like(label, dtype=np.float32),
                    vis_image=vis_image)

        if self.training:
            junction_gt, junction_weights = compute_junction_gt(label)
            data.update(junction_gt=junction_gt,
                        junction_weights=junction_weights)

        return data

Ejemplo n.º 16

0

Mostrar archivo

	def __getitem__(self, index):

		data = super(HistDataset, self).__getitem__(index)
		label_test = data['label_test']
		label = data['label']
		weights = data['weights']
		angle_range_label = data['angle_range_label']

		if angle_range_label == 255:
			return data

		###############################################################################
		
		lines_v, _rot_angle = lines.extract_lines((label_test == 1), self.angle_range_v)
		lines_h, _ = lines.extract_lines((label_test == 1), self.angle_range_h)

		lines_v_mask = lines.create_grid(label.shape, lines_v, width=16) * (label == 0).astype(int)
		lines_h_mask = lines.create_grid(label.shape, lines_h, width=16) * (label == 0).astype(int)

		""" plt.figure()
		plt.imshow(lines_v_mask)
		plt.figure()
		plt.imshow(lines_h_mask)
		plt.show() """

		_rot_angle = np.rad2deg(_rot_angle)
		angle_dist = np.abs(self.rot_angles - _rot_angle)

		###############################################################################

		if self.combine:
			idx = angle_range_label
			n_angles = len(self.rot_angles) - 1
		else:
			idx = np.argmin(angle_dist)
			n_angles = len(self.rot_angles)

		sz = (n_angles,) + label.shape

		bin_label_v = np.zeros(sz, dtype=np.float32)
		bin_label_v[idx] = lines_v_mask.astype(np.float32)

		bin_label_h = np.zeros(sz, dtype=np.float32)
		bin_label_h[idx] = lines_h_mask.astype(np.float32)

		bin_label = np.stack((bin_label_v, bin_label_h), 0)
		
		lines_v_mask_inv = (lines_v_mask != 1).astype(np.float32)
		lines_h_mask_inv = (lines_h_mask != 1).astype(np.float32)
		weights_v = np.repeat(weights[np.newaxis,...] * lines_v_mask_inv, n_angles, 0)
		weights_v[idx] = weights
		weights_h = np.repeat(weights[np.newaxis,...] * lines_h_mask_inv, n_angles, 0)
		weights_h[idx] = weights

		weights = np.stack((weights_v, weights_h), 0)

		###############################################################################

		softmax_label_v = 255 * np.ones(label.shape, dtype=np.int64)
		softmax_label_v[lines_v_mask.astype(bool)] = angle_range_label

		softmax_label_h = 255 * np.ones(label.shape, dtype=np.int64)
		softmax_label_h[lines_h_mask.astype(bool)] = angle_range_label

		softmax_label = np.stack((softmax_label_v, softmax_label_h), 0)

		###############################################################################

		data.update(bin_label=bin_label, softmax_label=softmax_label, weights=weights)
		
		return data

Ejemplo n.º 17

0

Mostrar archivo

Archivo: robochunk.py Proyecto: utiasSTARS/matchable-image-transforms

    def __getitem__(self, idx):
        # Get images
        rgb1, rgb2 = self.dataset.get_rgb_spatial_pair(idx)
        rgb1 = Image.fromarray(rgb1)
        rgb2 = Image.fromarray(rgb2)

        resize_scale = min(self.opts.image_load_size) / min(rgb1.size)
        resize_offset = 0.5 * (max(rgb1.size) * resize_scale -
                               max(self.opts.image_load_size))

        resize = transforms.Compose([
            transforms.Resize(min(self.opts.image_load_size)),
            transforms.CenterCrop(self.opts.image_load_size),
            custom_transforms.StatefulRandomCrop(self.opts.image_final_size) if
            self.random_crop else transforms.Resize(self.opts.image_final_size)
        ])
        make_grayscale = transforms.Grayscale()
        make_normalized_tensor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(self.opts.image_mean, self.opts.image_std)
        ])
        make_normalized_gray_tensor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((self.opts.image_mean[0], ),
                                 (self.opts.image_std[0], ))
        ])
        # Clamp to at the minimum to avoid computing log(0) = -inf
        make_log_tensor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda tensor: tensor.clamp(1e-3, 1.)),
            transforms.Lambda(lambda tensor: tensor.log())
        ])

        rgb1 = resize(rgb1)
        rgb2 = resize(rgb2)
        gray1 = make_grayscale(rgb1)
        gray2 = make_grayscale(rgb2)

        if self.opts.compute_matches:
            if self.cache_matches and self.matches12[idx] is not None:
                matches11 = self.matches11[idx]
                matches12 = self.matches12[idx]
                matches22 = self.matches22[idx]
            else:
                matches11 = self._get_match_count(gray1, gray1)
                matches12 = self._get_match_count(gray1, gray2)
                matches22 = self._get_match_count(gray2, gray2)
                if self.cache_matches:
                    self.matches11[idx] = matches11
                    self.matches12[idx] = matches12
                    self.matches22[idx] = matches22
            # matchability_score = matches12 / matches11

        logrgb1 = make_log_tensor(rgb1)
        logrgb2 = make_log_tensor(rgb2)
        rgb1 = make_normalized_tensor(rgb1)
        rgb2 = make_normalized_tensor(rgb2)
        gray1 = make_normalized_gray_tensor(gray1)
        gray2 = make_normalized_gray_tensor(gray2)

        data = {
            'rgb1': rgb1,
            'rgb2': rgb2,
            'gray1': gray1,
            'gray2': gray2,
            'logrgb1': logrgb1,
            'logrgb2': logrgb2
        }
        if self.opts.compute_matches:
            data.update({
                'matches11': matches11,
                'matches12': matches12,
                'matches22': matches22
            })

        return data

Ejemplo n.º 18

0

Mostrar archivo

Archivo: vkitti.py Proyecto: utiasSTARS/matchable-image-transforms

    def __getitem__(self, idx1):
        interval = np.random.randint(
            low=-self.opts.max_interval, high=self.opts.max_interval+1)

        idx2 = idx1 + interval
        if idx2 >= len(self.dataset2):
            idx2 = len(self.dataset2) - 1
        elif idx2 < 0:
            idx2 = 0

        # Get images
        rgb1 = Image.fromarray(self.dataset1.get_rgb(idx1))
        rgb2 = Image.fromarray(self.dataset2.get_rgb(idx2))

        resize_scale = min(self.opts.image_load_size) / min(rgb1.size)
        resize_offset = 0.5 * (max(rgb1.size) * resize_scale -
                               max(self.opts.image_load_size))

        resize = transforms.Compose([
            transforms.Resize(min(self.opts.image_load_size)),
            transforms.CenterCrop(self.opts.image_load_size),
            custom_transforms.StatefulRandomCrop(
                self.opts.image_final_size) if self.random_crop else transforms.Resize(self.opts.image_final_size)
        ])
        make_grayscale = transforms.Grayscale()
        make_normalized_tensor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(self.opts.image_mean, self.opts.image_std)
        ])
        make_normalized_gray_tensor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(
                (self.opts.image_mean[0],), (self.opts.image_std[0],))
        ])
        # Clamp to at the minimum to avoid computing log(0) = -inf
        make_log_tensor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda tensor: tensor.clamp(1e-3, 1.)),
            transforms.Lambda(lambda tensor: tensor.log())
        ])

        rgb1 = resize(rgb1)
        rgb2 = resize(rgb2)
        gray1 = make_grayscale(rgb1)
        gray2 = make_grayscale(rgb2)

        if self.opts.compute_matches:
            matches11 = self._get_match_count(gray1, gray1)
            matches12 = self._get_match_count(gray1, gray2)
            matches22 = self._get_match_count(gray2, gray2)

        logrgb1 = make_log_tensor(rgb1)
        logrgb2 = make_log_tensor(rgb2)
        rgb1 = make_normalized_tensor(rgb1)
        rgb2 = make_normalized_tensor(rgb2)
        gray1 = make_normalized_gray_tensor(gray1)
        gray2 = make_normalized_gray_tensor(gray2)

        data = {'rgb1': rgb1, 'rgb2': rgb2,
                'gray1': gray1, 'gray2': gray2,
                'logrgb1': logrgb1, 'logrgb2': logrgb2}
        if self.opts.compute_matches:
            data.update({'matches11': matches11,
                         'matches12': matches12,
                         'matches22': matches22})

        return data

Ejemplo n.º 19

0

Mostrar archivo

    def __getitem__(self, index):

        data = super(AngleDetectDatataset_v2, self).__getitem__(index)

        label = data['label']
        label_test = (data['label_test'] == 1)

        not_ignore = (label != 255)
        label_multiclass = label.copy()
        label_multiclass[not_ignore] = np.clip(label_multiclass[not_ignore] -
                                               1,
                                               a_min=0,
                                               a_max=2)
        data.update(label_multiclass=label_multiclass)

        label = label_multiclass[self.margin:-self.margin,
                                 self.margin:-self.margin]
        label_test = label_test[self.margin:-self.margin,
                                self.margin:-self.margin]
        vis_image = data['vis_image'][self.margin:-self.margin,
                                      self.margin:-self.margin]

        idx = data['angle_range_label']
        if idx != 255:

            edges_coords = self.extract_edges(label)
            sz = label_test.shape

            _rot_angle = self.rot_angles[idx]
            angle_range_v = np.array(
                (_rot_angle, _rot_angle + self.angle_step))
            angle_range_h = angle_range_v + 90.0

            proposed_lines_v, lines_endpoints_v = lines.get_line_proposals(
                angle_range_v,
                sz,
                angle_step=self.theta_step,
                rho_step=self.rho_step,
                edges_coords=edges_coords,
                label=label)
            true_lines_v, _ = lines.extract_lines(label_test, angle_range_v)
            lines_v_iou = self.get_lines_iou(true_lines_v, proposed_lines_v,
                                             label)

            proposed_lines_h, lines_endpoints_h = lines.get_line_proposals(
                angle_range_h,
                sz,
                angle_step=self.theta_step,
                rho_step=self.rho_step,
                edges_coords=edges_coords,
                label=label)
            true_lines_h, _ = lines.extract_lines(label_test, angle_range_h)
            lines_h_iou = self.get_lines_iou(true_lines_h, proposed_lines_h,
                                             label)

            lines_endpoints_v, lines_gt_v, (is_positive_v,
                                            is_negative_v) = self.get_lines_gt(
                                                np.array(lines_endpoints_v),
                                                lines_v_iou,
                                                return_is=True)
            if lines_endpoints_v.shape[0] == 0:
                pdb.set_trace()
            if self.debug:
                self.plot_gt(
                    true_lines_v,
                    np.array(proposed_lines_v)[is_positive_v].tolist(), label)
                self.plot_gt(
                    true_lines_v,
                    np.array(proposed_lines_v)[is_negative_v].tolist(), label)
                plt.show()
            proposed_lines_v = np.array(proposed_lines_v)
            proposed_lines_v = np.vstack((proposed_lines_v[is_positive_v],
                                          proposed_lines_v[is_negative_v]))

            lines_endpoints_h, lines_gt_h, (is_positive_h,
                                            is_negative_h) = self.get_lines_gt(
                                                np.array(lines_endpoints_h),
                                                lines_h_iou,
                                                return_is=True)
            if lines_endpoints_h.shape[0] == 0:
                pdb.set_trace()
            if self.debug:
                self.plot_gt(
                    true_lines_h,
                    np.array(proposed_lines_h)[is_positive_h].tolist(), label)
                self.plot_gt(
                    true_lines_h,
                    np.array(proposed_lines_h)[is_negative_h].tolist(), label)
                plt.show()
            proposed_lines_h = np.array(proposed_lines_h)
            proposed_lines_h = np.vstack((proposed_lines_h[is_positive_h],
                                          proposed_lines_h[is_negative_h]))

            lines_gt = np.append(lines_gt_v, lines_gt_h)
            data.update(
                lines_endpoints_v=lines_endpoints_v,
                lines_endpoints_h=lines_endpoints_h,
                lines_gt=lines_gt,
                proposed_lines_v=proposed_lines_v,
                proposed_lines_h=proposed_lines_h,
                vis_image=vis_image,
            )

        return data

Ejemplo n.º 20

0

Mostrar archivo

    def load_dataset(self, partition, size=(84, 84)):
        print('Loading dataset ({})'.format(partition))
        if partition == 'train_val':
            # with open(os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % 'train'),
            #           'rb') as handle:
            #     data = pickle.load(handle)
            data = pickle_load(
                os.path.join(self.root, 'compacted_datasets',
                             'mini_imagenet_%s.pickle' % 'train'))

            with open(
                    os.path.join(self.root, 'compacted_datasets',
                                 'mini_imagenet_%s.pickle' % 'val'),
                    'rb') as handle:
                data_val = pickle.load(handle)
            data.update(data_val)
            del data_val
        else:

            if partition == 'train':
                data = pickle_load(
                    os.path.join(self.root, 'compacted_datasets',
                                 'mini_imagenet_train.pickle'))
            else:
                with open(
                        os.path.join(self.root, 'compacted_datasets',
                                     'mini_imagenet_%s.pickle' % partition),
                        'rb') as handle:
                    data = pickle.load(handle)

        # skip = True
        # if skip:
        #     with open(os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_label_encoder.pickle'),
        #               'rb') as handle:
        #         label_encoder = pickle.load(handle)
        # else:
        label_encoder = None

        # Resize images and normalize
        # fixme
        # for class_ in data:
        #     for i in range(len(data[class_])):
        #         image2resize = pil_image.fromarray(np.uint8(data[class_][i]))
        #         image_resized = image2resize.resize((size[1], size[0]))
        #         image_resized = np.array(image_resized, dtype='float32')
        #
        #         # Normalize
        #         image_resized = np.transpose(image_resized, (2, 0, 1))
        #         image_resized[0, :, :] -= 120.45  # R
        #         image_resized[1, :, :] -= 115.74  # G
        #         image_resized[2, :, :] -= 104.65  # B
        #         image_resized /= 127.5
        #
        #         data[class_][i] = image_resized

        print("Num classes " + str(len(data)))
        num_images = 0
        for class_ in data:
            num_images += len(data[class_])
        print("Num images " + str(num_images))
        return data, label_encoder

Ejemplo n.º 21

0

Mostrar archivo

Archivo: attribute_reid_dataset.py Proyecto: kilianyp/person-multi-task-dataset

 def __getitem__(self, index):
     data = self.reid_dataset[index]
     attribute_data = self.attribute_dataset[data['pid']]
     data.update(attribute_data)
     return data

Ejemplo n.º 22

0

Mostrar archivo

Archivo: pan_ic15.py Proyecto: whai362/pan_pp.pytorch

    def prepare_train_data(self, index):
        img_path = self.img_paths[index]
        gt_path = self.gt_paths[index]

        img = get_img(img_path, self.read_type)
        bboxes, words = get_ann(img, gt_path)

        if bboxes.shape[0] > self.max_word_num:
            bboxes = bboxes[:self.max_word_num]
            words = words[:self.max_word_num]

        gt_words = np.full((self.max_word_num + 1, self.max_word_len),
                           self.char2id['PAD'],
                           dtype=np.int32)
        word_mask = np.zeros((self.max_word_num + 1, ), dtype=np.int32)
        for i, word in enumerate(words):
            if word == '###':
                continue
            word = word.lower()
            gt_word = np.full((self.max_word_len, ),
                              self.char2id['PAD'],
                              dtype=np.int)
            for j, char in enumerate(word):
                if j > self.max_word_len - 1:
                    break
                if char in self.char2id:
                    gt_word[j] = self.char2id[char]
                else:
                    gt_word[j] = self.char2id['UNK']
            if len(word) > self.max_word_len - 1:
                gt_word[-1] = self.char2id['EOS']
            else:
                gt_word[len(word)] = self.char2id['EOS']
            gt_words[i + 1] = gt_word
            word_mask[i + 1] = 1

        if self.is_transform:
            img = random_scale(img, self.short_size)

        gt_instance = np.zeros(img.shape[0:2], dtype='uint8')
        training_mask = np.ones(img.shape[0:2], dtype='uint8')
        if bboxes.shape[0] > 0:
            bboxes = np.reshape(bboxes * ([img.shape[1], img.shape[0]] * 4),
                                (bboxes.shape[0], -1, 2)).astype('int32')
            for i in range(bboxes.shape[0]):
                cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1)
                if words[i] == '###':
                    cv2.drawContours(training_mask, [bboxes[i]], -1, 0, -1)

        gt_kernels = []
        for rate in [self.kernel_scale]:
            gt_kernel = np.zeros(img.shape[0:2], dtype='uint8')
            kernel_bboxes = shrink(bboxes, rate)
            for i in range(bboxes.shape[0]):
                cv2.drawContours(gt_kernel, [kernel_bboxes[i]], -1, 1, -1)
            gt_kernels.append(gt_kernel)

        if self.is_transform:
            imgs = [img, gt_instance, training_mask]
            imgs.extend(gt_kernels)

            if not self.with_rec:
                imgs = random_horizontal_flip(imgs)
            imgs = random_rotate(imgs)
            gt_instance_before_crop = imgs[1].copy()
            imgs = random_crop_padding(imgs, self.img_size)
            img, gt_instance, training_mask, gt_kernels = imgs[0], imgs[
                1], imgs[2], imgs[3:]
            word_mask = update_word_mask(gt_instance, gt_instance_before_crop,
                                         word_mask)

        gt_text = gt_instance.copy()
        gt_text[gt_text > 0] = 1
        gt_kernels = np.array(gt_kernels)

        max_instance = np.max(gt_instance)
        gt_bboxes = np.zeros((self.max_word_num + 1, 4), dtype=np.int32)
        for i in range(1, max_instance + 1):
            ind = gt_instance == i
            if np.sum(ind) == 0:
                continue
            points = np.array(np.where(ind)).transpose((1, 0))
            tl = np.min(points, axis=0)
            br = np.max(points, axis=0) + 1
            gt_bboxes[i] = (tl[0], tl[1], br[0], br[1])

        img = Image.fromarray(img)
        img = img.convert('RGB')
        if self.is_transform:
            img = transforms.ColorJitter(brightness=32.0 / 255,
                                         saturation=0.5)(img)

        img = transforms.ToTensor()(img)
        img = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                   std=[0.229, 0.224, 0.225])(img)
        gt_text = torch.from_numpy(gt_text).long()
        gt_kernels = torch.from_numpy(gt_kernels).long()
        training_mask = torch.from_numpy(training_mask).long()
        gt_instance = torch.from_numpy(gt_instance).long()
        gt_bboxes = torch.from_numpy(gt_bboxes).long()
        gt_words = torch.from_numpy(gt_words).long()
        word_mask = torch.from_numpy(word_mask).long()

        data = dict(
            imgs=img,
            gt_texts=gt_text,
            gt_kernels=gt_kernels,
            training_masks=training_mask,
            gt_instances=gt_instance,
            gt_bboxes=gt_bboxes,
        )
        if self.with_rec:
            data.update(dict(gt_words=gt_words, word_masks=word_mask))

        return data

Ejemplo n.º 23

0

Mostrar archivo

Archivo: hist_dataset_combine.py Proyecto: davidgj94/pytorch-tools

    def __getitem__(self, index):

        data = super(HistDataset, self).__getitem__(index)
        label_test = data['label_test']
        label = data['label']
        weights = data['weights']
        angle_range_label = data['angle_range_label']

        if angle_range_label == 255:
            return data

        ###############################################################################

        lines_v, _rot_angle = lines.extract_lines((label_test == 1),
                                                  self.angle_range_v)
        lines_h, _ = lines.extract_lines((label_test == 1), self.angle_range_h)

        lines_v_mask = lines.create_grid(label.shape, lines_v, width=16)
        lines_h_mask = lines.create_grid(label.shape, lines_h, width=16)
        """ plt.figure()
		plt.imshow(lines_v_mask)
		plt.figure()
		plt.imshow(lines_h_mask)
		plt.show() """

        ###############################################################################

        sz = (len(self.rot_angles), ) + label.shape

        bin_label_v = np.zeros(sz, dtype=np.float32)
        bin_label_v[angle_range_label] = lines_v_mask.astype(np.float32)

        bin_label_h = np.zeros(sz, dtype=np.float32)
        bin_label_h[angle_range_label] = lines_h_mask.astype(np.float32)

        bin_label = np.stack((bin_label_v, bin_label_h), 0)

        weights_v = np.repeat(weights[np.newaxis, ...], len(self.rot_angles),
                              0)
        weights_h = np.repeat(weights[np.newaxis, ...], len(self.rot_angles),
                              0)

        close_idx_0 = angle_range_label - 1
        close_idx_1 = angle_range_label + 1

        if close_idx_0 > 0:
            weights_v[close_idx_0] *= (lines_v_mask == 0)
            weights_h[close_idx_0] *= (lines_h_mask == 0)

        if close_idx_1 < len(self.rot_angles):
            weights_v[close_idx_1] *= (lines_v_mask == 0)
            weights_h[close_idx_1] *= (lines_h_mask == 0)

        pdb.set_trace()

        weights = np.stack((weights_v, weights_h), 0)

        ###############################################################################

        softmax_label_v = 255 * np.ones(label.shape, dtype=np.int64)
        softmax_label_v[lines_v_mask.astype(bool)] = angle_range_label

        softmax_label_h = 255 * np.ones(label.shape, dtype=np.int64)
        softmax_label_h[lines_h_mask.astype(bool)] = angle_range_label

        softmax_label = np.stack((softmax_label_v, softmax_label_h), 0)

        ###############################################################################

        data.update(bin_label=bin_label,
                    softmax_label=softmax_label,
                    weights=weights)

        return data

Ejemplo n.º 24

0

Mostrar archivo

Archivo: cape.py Proyecto: taconite/PTF

    def __getitem__(self, idx):
        ''' Returns an item of the dataset.

        Args:
            idx (int): ID of data point
        '''
        data_path = self.data[idx]['data_path']
        subject = self.data[idx]['subject']
        gender = self.data[idx]['gender']
        data = {}

        aug_rot = self.augm_params().astype(np.float32)

        points_dict = np.load(data_path)

        # 3D models and points
        loc = points_dict['loc'].astype(np.float32)
        trans = points_dict['trans'].astype(np.float32)
        root_loc = points_dict['Jtr'][0].astype(np.float32)
        scale = points_dict['scale'].astype(np.float32)

        # Also get GT SMPL poses
        pose_body = points_dict['pose_body']
        pose_hand = points_dict['pose_hand']
        pose = np.concatenate([pose_body, pose_hand], axis=-1)
        pose = R.from_rotvec(pose.reshape([-1, 3]))

        body_mesh_a_pose = points_dict['a_pose_mesh_points']
        # Break symmetry if given in float16:
        if body_mesh_a_pose.dtype == np.float16:
            body_mesh_a_pose = body_mesh_a_pose.astype(np.float32)
            body_mesh_a_pose += 1e-4 * np.random.randn(*body_mesh_a_pose.shape)
        else:
            body_mesh_a_pose = body_mesh_a_pose.astype(np.float32)

        n_smpl_points = body_mesh_a_pose.shape[0]

        bone_transforms = points_dict['bone_transforms'].astype(np.float32)
        # Apply rotation augmentation to bone transformations
        bone_transforms_aug = np.matmul(np.expand_dims(aug_rot, axis=0), bone_transforms)
        bone_transforms_aug[:, :3, -1] += root_loc - trans - np.dot(aug_rot[:3, :3], root_loc - trans)
        bone_transforms = bone_transforms_aug
        # Get augmented posed-mesh
        skinning_weights = self.skinning_weights[gender]
        if self.use_abs_bone_transforms:
            J_regressor = self.J_regressors[gender]

        T = np.dot(skinning_weights, bone_transforms.reshape([-1, 16])).reshape([-1, 4, 4])

        homogen_coord = np.ones([n_smpl_points, 1], dtype=np.float32)
        a_pose_homo = np.concatenate([body_mesh_a_pose - trans, homogen_coord], axis=-1).reshape([n_smpl_points, 4, 1])
        body_mesh = np.matmul(T, a_pose_homo)[:, :3, 0].astype(np.float32) + trans

        # Get extents of model.
        bb_min = np.min(body_mesh, axis=0)
        bb_max = np.max(body_mesh, axis=0)
        # total_size = np.sqrt(np.square(bb_max - bb_min).sum())
        total_size = (bb_max - bb_min).max()
        # Scales all dimensions equally.
        scale = max(1.6, total_size)    # 1.6 is the magic number from IPNet
        loc = np.array(
            [(bb_min[0] + bb_max[0]) / 2,
             (bb_min[1] + bb_max[1]) / 2,
             (bb_min[2] + bb_max[2]) / 2],
            dtype=np.float32
        )

        posed_trimesh = trimesh.Trimesh(vertices=body_mesh, faces=self.faces)
        # a_pose_trimesh = trimesh.Trimesh(vertices=(body_mesh_a_pose - trans) * 1.0 / scale * 1.5, faces=self.faces)

        n_points_uniform = int(self.points_size * self.points_uniform_ratio)
        n_points_surface = self.points_size - n_points_uniform

        boxsize = 1 + self.points_padding
        points_uniform = np.random.rand(n_points_uniform, 3)
        points_uniform = boxsize * (points_uniform - 0.5)
        # Scale points in (padded) unit box back to the original space
        points_uniform *= scale
        points_uniform += loc
        # Sample points around posed-mesh surface
        n_points_surface_cloth = n_points_surface // 2 if self.double_layer else n_points_surface
        points_surface = posed_trimesh.sample(n_points_surface_cloth + self.input_pointcloud_n)
        if self.input_type == 'pointcloud':
            input_pointcloud = points_surface[n_points_surface_cloth:]
            noise = self.input_pointcloud_noise * np.random.randn(*input_pointcloud.shape)
            input_pointcloud = (input_pointcloud + noise).astype(np.float32)

        points_surface = points_surface[:n_points_surface_cloth]
        points_surface += np.random.normal(scale=self.points_sigma, size=points_surface.shape)

        if self.double_layer:
            n_points_surface_minimal = n_points_surface // 2

            posedir = self.posedirs[gender]
            minimal_shape_path = os.path.join(self.cape_path, 'cape_release', 'minimal_body_shape', subject, subject + '_minimal.npy')
            minimal_shape = np.load(minimal_shape_path)
            pose_mat = pose.as_matrix()
            ident = np.eye(3)
            pose_feature = (pose_mat - ident).reshape([207, 1])
            pose_offsets = np.dot(posedir.reshape([-1, 207]), pose_feature).reshape([6890, 3])
            minimal_shape += pose_offsets

            if self.use_abs_bone_transforms:
                Jtr_cano = np.dot(J_regressor, minimal_shape)
                Jtr_cano = Jtr_cano[IPNET2SMPL_IDX, :]

            a_pose_homo = np.concatenate([minimal_shape, homogen_coord], axis=-1).reshape([n_smpl_points, 4, 1])
            minimal_body_mesh = np.matmul(T, a_pose_homo)[:, :3, 0].astype(np.float32) + trans
            minimal_posed_trimesh = trimesh.Trimesh(vertices=minimal_body_mesh, faces=self.faces)

            # Sample points around minimally clothed posed-mesh surface
            points_surface_minimal = minimal_posed_trimesh.sample(n_points_surface_minimal)
            points_surface_minimal += np.random.normal(scale=self.points_sigma, size=points_surface_minimal.shape)

            points_surface = np.vstack([points_surface, points_surface_minimal])

        # Check occupancy values for sampled ponits
        query_points = np.vstack([points_uniform, points_surface]).astype(np.float32)
        if self.double_layer:
            # Double-layer occupancies, as was done in IPNet
            # 0: outside, 1: between body and cloth, 2: inside body mesh
            occupancies_cloth = check_mesh_contains(posed_trimesh, query_points)
            occupancies_minimal = check_mesh_contains(minimal_posed_trimesh, query_points)
            occupancies = occupancies_cloth.astype(np.int64)
            occupancies[occupancies_minimal] = 2
        else:
            occupancies = check_mesh_contains(posed_trimesh, query_points).astype(np.float32)

        # Skinning inds by querying nearest SMPL vertex on the clohted mesh
        kdtree = KDTree(body_mesh if self.query_on_clothed else minimal_body_mesh)
        _, p_idx = kdtree.query(query_points)
        pts_W = skinning_weights[p_idx, :]
        skinning_inds_ipnet = self.part_labels[p_idx] # skinning inds (14 parts)
        skinning_inds_smpl = pts_W.argmax(1)   # full skinning inds (24 parts)
        if self.num_joints == 14:
            skinning_inds = skinning_inds_ipnet
        else:
            skinning_inds = skinning_inds_smpl

        # Invert LBS to get query points in A-pose space
        T = np.dot(pts_W, bone_transforms.reshape([-1, 16])).reshape([-1, 4, 4])
        T = np.linalg.inv(T)

        homogen_coord = np.ones([self.points_size, 1], dtype=np.float32)
        posed_homo = np.concatenate([query_points - trans, homogen_coord], axis=-1).reshape([self.points_size, 4, 1])
        query_points_a_pose = np.matmul(T, posed_homo)[:, :3, 0].astype(np.float32) + trans

        if self.use_abs_bone_transforms:
            assert (not self.use_v_template and self.num_joints == 24)
            query_points_a_pose -= Jtr_cano[SMPL2IPNET_IDX[skinning_inds], :]

        if self.use_v_template:
            v_template = self.v_templates[gender]
            pose_shape_offsets = v_template - minimal_shape
            query_points_template = query_points_a_pose + pose_shape_offsets[p_idx, :]

        sc_factor = 1.0 / scale * 1.5 if self.normalized_scale else 1.0 # 1.5 is the magic number from IPNet
        offset = loc

        bone_transforms_inv = bone_transforms.copy()
        bone_transforms_inv[:, :3, -1] += trans - loc
        bone_transforms_inv = np.linalg.inv(bone_transforms_inv)
        bone_transforms_inv[:, :3, -1] *= sc_factor

        data = {
            None: (query_points - offset) * sc_factor,
            'occ': occupancies,
            'trans': trans,
            'root_loc': root_loc,
            'pts_a_pose': (query_points_a_pose - (trans if self.use_global_trans else offset)) * sc_factor,
            'skinning_inds': skinning_inds,
            'skinning_inds_ipnet': skinning_inds_ipnet,
            'skinning_inds_smpl': skinning_inds_smpl,
            'loc': loc,
            'scale': scale,
            'bone_transforms': bone_transforms,
            'bone_transforms_inv': bone_transforms_inv,
        }

        if self.use_v_template:
            data.update({'pts_template': (query_points_template - (trans if self.use_global_trans else offset)) * sc_factor})

        if self.mode in ['test']:
            data.update({'smpl_vertices': body_mesh, 'smpl_a_pose_vertices': body_mesh_a_pose})
            if self.double_layer:
                data.update({'minimal_smpl_vertices': minimal_body_mesh})

        data_out = {}
        field_name = 'points' if self.mode in ['train', 'test'] else 'points_iou'
        for k, v in data.items():
            if k is None:
                data_out[field_name] = v
            else:
                data_out['%s.%s' % (field_name, k)] = v

        if self.input_type == 'pointcloud':
            data_out.update(
                {'inputs': (input_pointcloud - offset) * sc_factor,
                 'idx': idx,
                }
            )
        elif self.input_type == 'voxel':
            voxels = np.unpackbits(points_dict['voxels_occ']).astype(np.float32)
            voxels = np.reshape(voxels, [self.voxel_res] * 3)
            data_out.update(
                {'inputs': voxels,
                 'idx': idx,
                }
            )
        else:
            raise ValueError('Unsupported input type: {}'.format(self.input_type))

        return data_out

Ejemplo n.º 25

0

Mostrar archivo

Archivo: ade20k.py Proyecto: wdeng/SOGNet

    def __getitem__(self, index):
        # index = (index + 2000) % len(self.roidb)
        blob = defaultdict(list)
        im_blob, im_scales = self.get_image_blob([self.roidb[index]])
        if config.network.has_rpn:
            if self.phase != 'test':
                add_rpn_blobs(blob, im_scales, [self.roidb[index]])
                data = {'data': im_blob, 'im_info': blob['im_info']}
                label = {'roidb': blob['roidb'][0]}
                for stride in config.network.rpn_feat_stride:
                    label.update({
                        'rpn_labels_fpn{}'.format(stride):
                        blob['rpn_labels_int32_wide_fpn{}'.format(
                            stride)].astype(np.int64),
                        'rpn_bbox_targets_fpn{}'.format(stride):
                        blob['rpn_bbox_targets_wide_fpn{}'.format(stride)],
                        'rpn_bbox_inside_weights_fpn{}'.format(stride):
                        blob['rpn_bbox_inside_weights_wide_fpn{}'.format(
                            stride)],
                        'rpn_bbox_outside_weights_fpn{}'.format(stride):
                        blob['rpn_bbox_outside_weights_wide_fpn{}'.format(
                            stride)]
                    })
            else:
                data = {
                    'data':
                    im_blob,
                    'im_info':
                    np.array(
                        [[im_blob.shape[-2], im_blob.shape[-1], im_scales[0]]],
                        np.float32)
                }
                label = {'roidb': self.roidb[index]}
        else:
            raise NotImplementedError
        if config.network.has_fcn_head:
            if self.phase != 'test':
                seg_gt = np.array(
                    Image.open(self.roidb[index]['image'].replace(
                        'images', 'annotations')))
                if self.roidb[index]['flipped']:
                    seg_gt = np.fliplr(seg_gt)
                seg_gt = cv2.resize(seg_gt,
                                    None,
                                    None,
                                    fx=im_scales[0],
                                    fy=im_scales[0],
                                    interpolation=cv2.INTER_NEAREST)
                label.update({'seg_gt': seg_gt})
                # label.update({'seg_gt_4x': cv2.resize(seg_gt, (im_blob.shape[-1] // 4, im_blob.shape[-2] // 4), interpolation=cv2.INTER_NEAREST)})
                label.update({'gt_classes': label['roidb']['gt_classes']})
                label.update({
                    'mask_gt':
                    np.zeros((len(label['gt_classes']), im_blob.shape[-2],
                              im_blob.shape[-1]))
                })
                for i in range(len(label['gt_classes'])):
                    if type(label['roidb']['segms'][i]) is list and type(
                            label['roidb']['segms'][i][0]) is list:
                        img = Image.new(
                            'L',
                            (int(np.round(im_blob.shape[-1] / im_scales[0])),
                             int(np.round(im_blob.shape[-2] / im_scales[0]))),
                            0)
                        for j in range(len(label['roidb']['segms'][i])):
                            ImageDraw.Draw(img).polygon(tuple(
                                label['roidb']['segms'][i][j]),
                                                        outline=1,
                                                        fill=1)
                            # try:
                            #     ImageDraw.Draw(img).polygon(tuple(label['roidb']['segms'][i][j]), outline=1, fill=1)
                            # except:
                            #     print(label['roidb']['segms'][i], j)
                            #     import pdb; pdb.set_trace()
                            #     sys.exit()
                        label['mask_gt'][i] = cv2.resize(
                            np.array(img),
                            None,
                            None,
                            fx=im_scales[0],
                            fy=im_scales[0],
                            interpolation=cv2.INTER_NEAREST)
                    else:
                        assert type(
                            label['roidb']['segms'][i]) is dict or type(
                                label['roidb']['segms'][i][0]) is dict
                        if type(label['roidb']['segms'][i]) is dict:
                            label['mask_gt'][i] = cv2.resize(
                                mask_util.decode(
                                    mask_util.frPyObjects(
                                        [label['roidb']['segms'][i]],
                                        label['roidb']['segms'][i]['size'][0],
                                        label['roidb']['segms'][i]['size'][1]))
                                [:, :, 0],
                                None,
                                None,
                                fx=im_scales[0],
                                fy=im_scales[0],
                                interpolation=cv2.INTER_NEAREST)
                        else:
                            assert len(label['roidb']['segms'][i]) == 1
                            output = mask_util.decode(
                                label['roidb']['segms'][i])
                            label['mask_gt'][i] = cv2.resize(
                                output[:, :, 0],
                                None,
                                None,
                                fx=im_scales[0],
                                fy=im_scales[0],
                                interpolation=cv2.INTER_NEAREST)
                if config.train.fcn_with_roi_loss:
                    gt_boxes = label['roidb']['boxes'][np.where(
                        label['roidb']['gt_classes'] > 0)[0]]
                    gt_boxes = np.around(gt_boxes * im_scales[0]).astype(
                        np.int32)
                    label.update({
                        'seg_roi_gt':
                        np.zeros((len(gt_boxes), config.network.mask_size,
                                  config.network.mask_size),
                                 dtype=np.int64)
                    })
                    for i in range(len(gt_boxes)):
                        if gt_boxes[i][3] == gt_boxes[i][1]:
                            gt_boxes[i][3] += 1
                        if gt_boxes[i][2] == gt_boxes[i][0]:
                            gt_boxes[i][2] += 1
                        label['seg_roi_gt'][i] = cv2.resize(
                            seg_gt[gt_boxes[i][1]:gt_boxes[i][3],
                                   gt_boxes[i][0]:gt_boxes[i][2]],
                            (config.network.mask_size,
                             config.network.mask_size),
                            interpolation=cv2.INTER_NEAREST)
            else:
                seg_gt = np.array(
                    Image.open(self.roidb[index]['image'].replace(
                        'images', 'annotations')))
                if self.roidb[index]['flipped']:
                    seg_gt = np.fliplr(seg_gt)
                seg_gt = cv2.resize(seg_gt,
                                    None,
                                    None,
                                    fx=im_scales[0],
                                    fy=im_scales[0],
                                    interpolation=cv2.INTER_NEAREST)
                label.update({'seg_gt': seg_gt})

        if config.network.has_crf:
            data.update(self.build_crf_graph(im_blob))

        return data, label, index