def __getitem__(self, index): data = super(OriDatasetHist, self).__getitem__(index) mask_v = data["mask_v"] mask_h = data["mask_h"] weights = data["weights"] idx = data["angle_range_label"] bin_label_v, weights_v = self.create_bin_label(mask_v, idx, weights) # self.debug_bin_label(bin_label_v, idx) # self.debug_bin_label(weights_v, idx) bin_label_h, weights_h = self.create_bin_label(mask_h, idx, weights) # self.debug_bin_label(bin_label_h, idx) # self.debug_bin_label(weights_h, idx) softmax_label_v = self.create_softmax_label(mask_v, idx) softmax_label_h = self.create_softmax_label(mask_h, idx) # plt.figure() # plt.imshow(softmax_label_v) # plt.figure() # plt.imshow(softmax_label_h) # plt.show() data.update(bin_label_v=bin_label_v, weights_v=weights_v, bin_label_h=bin_label_h, weights_h=weights_h, softmax_label_v=softmax_label_v, softmax_label_h=softmax_label_h) return data
def __getitem__(self, index): # sample cond_A_img, A_img, cond_A_path, A_paths, paired_cond_B, paired_B = self._get_sample(self.dataset, index) # sample B _, _, _, _, _, B = self._get_sample(self.skeleton_dataset, index, load_image=False) # normalize keypoints paired_cond_B = utils.normalize_points( paired_cond_B, self.fineSize, self.fineSize) paired_B = utils.normalize_points( paired_B, self.fineSize, self.fineSize) B = utils.normalize_points( B, self.fineSize, self.fineSize) if self.load_images: A = self.A_transform(A_img) cond_A = self.A_transform(cond_A_img) data = {'B': torch.from_numpy(B), 'paired_cond_B': torch.from_numpy(paired_cond_B), 'paired_B': torch.from_numpy(paired_B), 'A_paths': A_paths, 'cond_A_path': cond_A_path} if self.load_images: data.update({'A': A, 'cond_A': cond_A}) return data
def __getitem__(self, index): data = super(OriDataset, self).__getitem__(index) weight = data['weights'] label_test = data['label_test'] mask = np.zeros(label_test.shape, dtype=np.float32) mask_v = np.zeros(label_test.shape, dtype=np.float32) mask_h = np.zeros(label_test.shape, dtype=np.float32) if np.any(label_test == 1): mask_v, mask_h, mask = self.create_mask(label_test, weight, width=16) mask_test = self.create_mask_test(label_test, weight, width1=24, width2=8) data.update(mask_v=mask_v, mask_h=mask_h, mask=mask, mask_test=mask_test) return data
def predict_annotated_example(ann, experiment_results): model = experiment_results['model'] phi = experiment_results['phi'] ex = ann['example'] prem = ex.sentence1_parse hyp = ex.sentence2_parse feats = phi(prem, hyp) pred = model.predict([feats])[0] gold = ex.gold_label data = {cat: True for cat in ann['annotations']} data.update({'gold': gold, 'prediction': pred, 'correct': gold == pred}) return data
def __getitem__(self, index): data = self.reid_dataset[index] # attribute dataset expects rewritten pid # rewriting is possible if data['pid'] in [-1, 0]: # we need to write some data # this will be later ignored mapped = 0 else: mapped = self.label_dic[data['pid']] attribute_data = self.attribute_dataset[mapped] data.update(attribute_data) return data
def load_dataset(self, partition, size=(84, 84)): print("Loading dataset") if partition == 'train_val': with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % 'train'), 'rb') as handle: data = pickle.load(handle) with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % 'val'), 'rb') as handle: data_val = pickle.load(handle) data.update(data_val) del data_val else: with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % partition), 'rb') as handle: data = pickle.load(handle) with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_label_encoder.pickle'), 'rb') as handle: label_encoder = pickle.load(handle) # Resize images and normalize for class_ in data: for i in range(len(data[class_])): image2resize = pil_image.fromarray(np.uint8(data[class_][i])) image_resized = image2resize.resize((size[1], size[0])) image_resized = np.array(image_resized, dtype='float32') # Normalize image_resized = np.transpose(image_resized, (2, 0, 1)) image_resized[0, :, :] -= 120.45 # R image_resized[1, :, :] -= 115.74 # G image_resized[2, :, :] -= 104.65 # B image_resized /= 127.5 data[class_][i] = image_resized print("Num classes " + str(len(data))) num_images = 0 for class_ in data: num_images += len(data[class_]) print("Num images " + str(num_images)) return data, label_encoder
def __getitem__(self, index): data = super(MultiTaskDataset_v2, self).__getitem__(index) label = data['label'] mask = (label != 255) label_3c = label.copy() label_3c[label == 0] = 1 label_3c[mask] -= 1 label_2c = (label == 1).astype(np.float32) weights = np.logical_or(label == 1, label == 0).astype(np.float32) data.update(label_3c=label_3c, label_2c=label_2c, weights=weights) return data
def collate_fn(self, items): batch = [] items = itertools.chain.from_iterable(items) for uttid, data in items: aux_info = self.aux_utt_info.get(uttid, {}) aux_info.pop("length", None) data.update(aux_info) data["x"] = torch.from_numpy(data["x"]).float() if self.tokenizer is not None: data["labels"] = torch.tensor( self.tokenizer.text2ids(data["text"])) data.pop("rate", None) data["uttid"] = uttid batch.append(data) return batch
def __getitem__(self, index): data = self.filtered_data[index] if self.config.use_cache: cache_path = self.get_cache_name(data) if not os.path.exists(cache_path): self.save_cache(data) with open(cache_path, 'rb') as file: processed_data = pickle.load(file) file.close() else: processed_data = self.preprocess(data) data.update(processed_data) return data
def __getitem__(self, index): data = super(MultiTaskDataset, self).__getitem__(index) label = data['label'] mask = (label != 255) label_3c = label.copy() label_3c[label == 0] = 1 label_3c[mask] -= 1 label_2c = label.copy() label_2c[np.logical_or(label == 2, label == 3)] = 255 label_3c[label_3c == 255] = self.ignore_label label_2c[label_2c == 255] = self.ignore_label data.update(label_3c=label_3c, label_2c=label_2c) return data
def __getitem__(self, idx): images, targets = self.pre_continuous_frames(idx) data = {} if self._transforms is not None: images, targets = self._transforms(images, targets) gt_instances = [] for img_i, targets_i in zip(images, targets): gt_instances_i = self._targets_to_instances( targets_i, img_i.shape[1:3]) gt_instances.append(gt_instances_i) data.update({ 'imgs': images, 'gt_instances': gt_instances, }) if self.args.vis: data['ori_img'] = [target_i['ori_img'] for target_i in targets] return data
def read_dir(data_dir): clients = [] groups = [] data = defaultdict(lambda: None) files = os.listdir(data_dir) files = [f for f in files if f.endswith('.json')] for f in files: file_path = os.path.join(data_dir, f) with open(file_path, 'r') as inf: cdata = json.load(inf) clients.extend(cdata['users']) if 'hierarchies' in cdata: groups.extend(cdata['hierarchies']) data.update(cdata['user_data']) clients = list(sorted(data.keys())) return clients, groups, data
def __getitem__(self, idx): sample_start, sample_end, sample_interval = self._get_sample_range(idx) images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval) data = {} dataset_name = targets[0]['dataset'] transform = self.dataset2transform[dataset_name] if transform is not None: images, targets = transform(images, targets) gt_instances = [] for img_i, targets_i in zip(images, targets): gt_instances_i = self._targets_to_instances(targets_i, img_i.shape[1:3]) gt_instances.append(gt_instances_i) data.update({ 'imgs': images, 'gt_instances': gt_instances, }) if self.args.vis: data['ori_img'] = [target_i['ori_img'] for target_i in targets] return data
def load_dataset(self): print("Loading dataset") data = {} if self.partition == 'train': num_partition = 18 elif self.partition == 'val': num_partition = 5 elif self.partition == 'test': num_partition = 8 partition_count = 0 for i in range(num_partition): partition_count = partition_count + 1 with open( os.path.join( self.root, 'tiered-imagenet/compacted_datasets', 'tiered_imagenet_{}_{}.pickle'.format( self.partition, partition_count)), 'rb') as handle: data.update(pickle.load(handle)) # Resize images and normalize for class_ in data: for i in range(len(data[class_])): image2resize = pil_image.fromarray(np.uint8(data[class_][i])) image_resized = image2resize.resize( (self.data_size[2], self.data_size[1])) image_resized = np.array(image_resized, dtype='float32') # Normalize image_resized = np.transpose(image_resized, (2, 0, 1)) image_resized[0, :, :] -= 120.45 # R image_resized[1, :, :] -= 115.74 # G image_resized[2, :, :] -= 104.65 # B image_resized /= 127.5 data[class_][i] = image_resized print("Num classes " + str(len(data))) num_images = 0 for class_ in data: num_images += len(data[class_]) print("Num images " + str(num_images)) return data
def __getitem__(self, index): image_id, image, label = self._load_data(index) image, label = self.augmentations(image, label) vis_image = image.copy() image = TF.to_tensor(image) image = TF.normalize(image, self.mean, self.var) label = label.astype(np.float32) image = image.numpy() data = dict(image_id=image_id, image=image, label=label, weights=np.ones_like(label, dtype=np.float32), vis_image=vis_image) if self.training: junction_gt, junction_weights = compute_junction_gt(label) data.update(junction_gt=junction_gt, junction_weights=junction_weights) return data
def __getitem__(self, index): data = super(HistDataset, self).__getitem__(index) label_test = data['label_test'] label = data['label'] weights = data['weights'] angle_range_label = data['angle_range_label'] if angle_range_label == 255: return data ############################################################################### lines_v, _rot_angle = lines.extract_lines((label_test == 1), self.angle_range_v) lines_h, _ = lines.extract_lines((label_test == 1), self.angle_range_h) lines_v_mask = lines.create_grid(label.shape, lines_v, width=16) * (label == 0).astype(int) lines_h_mask = lines.create_grid(label.shape, lines_h, width=16) * (label == 0).astype(int) """ plt.figure() plt.imshow(lines_v_mask) plt.figure() plt.imshow(lines_h_mask) plt.show() """ _rot_angle = np.rad2deg(_rot_angle) angle_dist = np.abs(self.rot_angles - _rot_angle) ############################################################################### if self.combine: idx = angle_range_label n_angles = len(self.rot_angles) - 1 else: idx = np.argmin(angle_dist) n_angles = len(self.rot_angles) sz = (n_angles,) + label.shape bin_label_v = np.zeros(sz, dtype=np.float32) bin_label_v[idx] = lines_v_mask.astype(np.float32) bin_label_h = np.zeros(sz, dtype=np.float32) bin_label_h[idx] = lines_h_mask.astype(np.float32) bin_label = np.stack((bin_label_v, bin_label_h), 0) lines_v_mask_inv = (lines_v_mask != 1).astype(np.float32) lines_h_mask_inv = (lines_h_mask != 1).astype(np.float32) weights_v = np.repeat(weights[np.newaxis,...] * lines_v_mask_inv, n_angles, 0) weights_v[idx] = weights weights_h = np.repeat(weights[np.newaxis,...] * lines_h_mask_inv, n_angles, 0) weights_h[idx] = weights weights = np.stack((weights_v, weights_h), 0) ############################################################################### softmax_label_v = 255 * np.ones(label.shape, dtype=np.int64) softmax_label_v[lines_v_mask.astype(bool)] = angle_range_label softmax_label_h = 255 * np.ones(label.shape, dtype=np.int64) softmax_label_h[lines_h_mask.astype(bool)] = angle_range_label softmax_label = np.stack((softmax_label_v, softmax_label_h), 0) ############################################################################### data.update(bin_label=bin_label, softmax_label=softmax_label, weights=weights) return data
def __getitem__(self, idx): # Get images rgb1, rgb2 = self.dataset.get_rgb_spatial_pair(idx) rgb1 = Image.fromarray(rgb1) rgb2 = Image.fromarray(rgb2) resize_scale = min(self.opts.image_load_size) / min(rgb1.size) resize_offset = 0.5 * (max(rgb1.size) * resize_scale - max(self.opts.image_load_size)) resize = transforms.Compose([ transforms.Resize(min(self.opts.image_load_size)), transforms.CenterCrop(self.opts.image_load_size), custom_transforms.StatefulRandomCrop(self.opts.image_final_size) if self.random_crop else transforms.Resize(self.opts.image_final_size) ]) make_grayscale = transforms.Grayscale() make_normalized_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(self.opts.image_mean, self.opts.image_std) ]) make_normalized_gray_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((self.opts.image_mean[0], ), (self.opts.image_std[0], )) ]) # Clamp to at the minimum to avoid computing log(0) = -inf make_log_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda tensor: tensor.clamp(1e-3, 1.)), transforms.Lambda(lambda tensor: tensor.log()) ]) rgb1 = resize(rgb1) rgb2 = resize(rgb2) gray1 = make_grayscale(rgb1) gray2 = make_grayscale(rgb2) if self.opts.compute_matches: if self.cache_matches and self.matches12[idx] is not None: matches11 = self.matches11[idx] matches12 = self.matches12[idx] matches22 = self.matches22[idx] else: matches11 = self._get_match_count(gray1, gray1) matches12 = self._get_match_count(gray1, gray2) matches22 = self._get_match_count(gray2, gray2) if self.cache_matches: self.matches11[idx] = matches11 self.matches12[idx] = matches12 self.matches22[idx] = matches22 # matchability_score = matches12 / matches11 logrgb1 = make_log_tensor(rgb1) logrgb2 = make_log_tensor(rgb2) rgb1 = make_normalized_tensor(rgb1) rgb2 = make_normalized_tensor(rgb2) gray1 = make_normalized_gray_tensor(gray1) gray2 = make_normalized_gray_tensor(gray2) data = { 'rgb1': rgb1, 'rgb2': rgb2, 'gray1': gray1, 'gray2': gray2, 'logrgb1': logrgb1, 'logrgb2': logrgb2 } if self.opts.compute_matches: data.update({ 'matches11': matches11, 'matches12': matches12, 'matches22': matches22 }) return data
def __getitem__(self, idx1): interval = np.random.randint( low=-self.opts.max_interval, high=self.opts.max_interval+1) idx2 = idx1 + interval if idx2 >= len(self.dataset2): idx2 = len(self.dataset2) - 1 elif idx2 < 0: idx2 = 0 # Get images rgb1 = Image.fromarray(self.dataset1.get_rgb(idx1)) rgb2 = Image.fromarray(self.dataset2.get_rgb(idx2)) resize_scale = min(self.opts.image_load_size) / min(rgb1.size) resize_offset = 0.5 * (max(rgb1.size) * resize_scale - max(self.opts.image_load_size)) resize = transforms.Compose([ transforms.Resize(min(self.opts.image_load_size)), transforms.CenterCrop(self.opts.image_load_size), custom_transforms.StatefulRandomCrop( self.opts.image_final_size) if self.random_crop else transforms.Resize(self.opts.image_final_size) ]) make_grayscale = transforms.Grayscale() make_normalized_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(self.opts.image_mean, self.opts.image_std) ]) make_normalized_gray_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (self.opts.image_mean[0],), (self.opts.image_std[0],)) ]) # Clamp to at the minimum to avoid computing log(0) = -inf make_log_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda tensor: tensor.clamp(1e-3, 1.)), transforms.Lambda(lambda tensor: tensor.log()) ]) rgb1 = resize(rgb1) rgb2 = resize(rgb2) gray1 = make_grayscale(rgb1) gray2 = make_grayscale(rgb2) if self.opts.compute_matches: matches11 = self._get_match_count(gray1, gray1) matches12 = self._get_match_count(gray1, gray2) matches22 = self._get_match_count(gray2, gray2) logrgb1 = make_log_tensor(rgb1) logrgb2 = make_log_tensor(rgb2) rgb1 = make_normalized_tensor(rgb1) rgb2 = make_normalized_tensor(rgb2) gray1 = make_normalized_gray_tensor(gray1) gray2 = make_normalized_gray_tensor(gray2) data = {'rgb1': rgb1, 'rgb2': rgb2, 'gray1': gray1, 'gray2': gray2, 'logrgb1': logrgb1, 'logrgb2': logrgb2} if self.opts.compute_matches: data.update({'matches11': matches11, 'matches12': matches12, 'matches22': matches22}) return data
def __getitem__(self, index): data = super(AngleDetectDatataset_v2, self).__getitem__(index) label = data['label'] label_test = (data['label_test'] == 1) not_ignore = (label != 255) label_multiclass = label.copy() label_multiclass[not_ignore] = np.clip(label_multiclass[not_ignore] - 1, a_min=0, a_max=2) data.update(label_multiclass=label_multiclass) label = label_multiclass[self.margin:-self.margin, self.margin:-self.margin] label_test = label_test[self.margin:-self.margin, self.margin:-self.margin] vis_image = data['vis_image'][self.margin:-self.margin, self.margin:-self.margin] idx = data['angle_range_label'] if idx != 255: edges_coords = self.extract_edges(label) sz = label_test.shape _rot_angle = self.rot_angles[idx] angle_range_v = np.array( (_rot_angle, _rot_angle + self.angle_step)) angle_range_h = angle_range_v + 90.0 proposed_lines_v, lines_endpoints_v = lines.get_line_proposals( angle_range_v, sz, angle_step=self.theta_step, rho_step=self.rho_step, edges_coords=edges_coords, label=label) true_lines_v, _ = lines.extract_lines(label_test, angle_range_v) lines_v_iou = self.get_lines_iou(true_lines_v, proposed_lines_v, label) proposed_lines_h, lines_endpoints_h = lines.get_line_proposals( angle_range_h, sz, angle_step=self.theta_step, rho_step=self.rho_step, edges_coords=edges_coords, label=label) true_lines_h, _ = lines.extract_lines(label_test, angle_range_h) lines_h_iou = self.get_lines_iou(true_lines_h, proposed_lines_h, label) lines_endpoints_v, lines_gt_v, (is_positive_v, is_negative_v) = self.get_lines_gt( np.array(lines_endpoints_v), lines_v_iou, return_is=True) if lines_endpoints_v.shape[0] == 0: pdb.set_trace() if self.debug: self.plot_gt( true_lines_v, np.array(proposed_lines_v)[is_positive_v].tolist(), label) self.plot_gt( true_lines_v, np.array(proposed_lines_v)[is_negative_v].tolist(), label) plt.show() proposed_lines_v = np.array(proposed_lines_v) proposed_lines_v = np.vstack((proposed_lines_v[is_positive_v], proposed_lines_v[is_negative_v])) lines_endpoints_h, lines_gt_h, (is_positive_h, is_negative_h) = self.get_lines_gt( np.array(lines_endpoints_h), lines_h_iou, return_is=True) if lines_endpoints_h.shape[0] == 0: pdb.set_trace() if self.debug: self.plot_gt( true_lines_h, np.array(proposed_lines_h)[is_positive_h].tolist(), label) self.plot_gt( true_lines_h, np.array(proposed_lines_h)[is_negative_h].tolist(), label) plt.show() proposed_lines_h = np.array(proposed_lines_h) proposed_lines_h = np.vstack((proposed_lines_h[is_positive_h], proposed_lines_h[is_negative_h])) lines_gt = np.append(lines_gt_v, lines_gt_h) data.update( lines_endpoints_v=lines_endpoints_v, lines_endpoints_h=lines_endpoints_h, lines_gt=lines_gt, proposed_lines_v=proposed_lines_v, proposed_lines_h=proposed_lines_h, vis_image=vis_image, ) return data
def load_dataset(self, partition, size=(84, 84)): print('Loading dataset ({})'.format(partition)) if partition == 'train_val': # with open(os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % 'train'), # 'rb') as handle: # data = pickle.load(handle) data = pickle_load( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % 'train')) with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % 'val'), 'rb') as handle: data_val = pickle.load(handle) data.update(data_val) del data_val else: if partition == 'train': data = pickle_load( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_train.pickle')) else: with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % partition), 'rb') as handle: data = pickle.load(handle) # skip = True # if skip: # with open(os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_label_encoder.pickle'), # 'rb') as handle: # label_encoder = pickle.load(handle) # else: label_encoder = None # Resize images and normalize # fixme # for class_ in data: # for i in range(len(data[class_])): # image2resize = pil_image.fromarray(np.uint8(data[class_][i])) # image_resized = image2resize.resize((size[1], size[0])) # image_resized = np.array(image_resized, dtype='float32') # # # Normalize # image_resized = np.transpose(image_resized, (2, 0, 1)) # image_resized[0, :, :] -= 120.45 # R # image_resized[1, :, :] -= 115.74 # G # image_resized[2, :, :] -= 104.65 # B # image_resized /= 127.5 # # data[class_][i] = image_resized print("Num classes " + str(len(data))) num_images = 0 for class_ in data: num_images += len(data[class_]) print("Num images " + str(num_images)) return data, label_encoder
def __getitem__(self, index): data = self.reid_dataset[index] attribute_data = self.attribute_dataset[data['pid']] data.update(attribute_data) return data
def prepare_train_data(self, index): img_path = self.img_paths[index] gt_path = self.gt_paths[index] img = get_img(img_path, self.read_type) bboxes, words = get_ann(img, gt_path) if bboxes.shape[0] > self.max_word_num: bboxes = bboxes[:self.max_word_num] words = words[:self.max_word_num] gt_words = np.full((self.max_word_num + 1, self.max_word_len), self.char2id['PAD'], dtype=np.int32) word_mask = np.zeros((self.max_word_num + 1, ), dtype=np.int32) for i, word in enumerate(words): if word == '###': continue word = word.lower() gt_word = np.full((self.max_word_len, ), self.char2id['PAD'], dtype=np.int) for j, char in enumerate(word): if j > self.max_word_len - 1: break if char in self.char2id: gt_word[j] = self.char2id[char] else: gt_word[j] = self.char2id['UNK'] if len(word) > self.max_word_len - 1: gt_word[-1] = self.char2id['EOS'] else: gt_word[len(word)] = self.char2id['EOS'] gt_words[i + 1] = gt_word word_mask[i + 1] = 1 if self.is_transform: img = random_scale(img, self.short_size) gt_instance = np.zeros(img.shape[0:2], dtype='uint8') training_mask = np.ones(img.shape[0:2], dtype='uint8') if bboxes.shape[0] > 0: bboxes = np.reshape(bboxes * ([img.shape[1], img.shape[0]] * 4), (bboxes.shape[0], -1, 2)).astype('int32') for i in range(bboxes.shape[0]): cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1) if words[i] == '###': cv2.drawContours(training_mask, [bboxes[i]], -1, 0, -1) gt_kernels = [] for rate in [self.kernel_scale]: gt_kernel = np.zeros(img.shape[0:2], dtype='uint8') kernel_bboxes = shrink(bboxes, rate) for i in range(bboxes.shape[0]): cv2.drawContours(gt_kernel, [kernel_bboxes[i]], -1, 1, -1) gt_kernels.append(gt_kernel) if self.is_transform: imgs = [img, gt_instance, training_mask] imgs.extend(gt_kernels) if not self.with_rec: imgs = random_horizontal_flip(imgs) imgs = random_rotate(imgs) gt_instance_before_crop = imgs[1].copy() imgs = random_crop_padding(imgs, self.img_size) img, gt_instance, training_mask, gt_kernels = imgs[0], imgs[ 1], imgs[2], imgs[3:] word_mask = update_word_mask(gt_instance, gt_instance_before_crop, word_mask) gt_text = gt_instance.copy() gt_text[gt_text > 0] = 1 gt_kernels = np.array(gt_kernels) max_instance = np.max(gt_instance) gt_bboxes = np.zeros((self.max_word_num + 1, 4), dtype=np.int32) for i in range(1, max_instance + 1): ind = gt_instance == i if np.sum(ind) == 0: continue points = np.array(np.where(ind)).transpose((1, 0)) tl = np.min(points, axis=0) br = np.max(points, axis=0) + 1 gt_bboxes[i] = (tl[0], tl[1], br[0], br[1]) img = Image.fromarray(img) img = img.convert('RGB') if self.is_transform: img = transforms.ColorJitter(brightness=32.0 / 255, saturation=0.5)(img) img = transforms.ToTensor()(img) img = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(img) gt_text = torch.from_numpy(gt_text).long() gt_kernels = torch.from_numpy(gt_kernels).long() training_mask = torch.from_numpy(training_mask).long() gt_instance = torch.from_numpy(gt_instance).long() gt_bboxes = torch.from_numpy(gt_bboxes).long() gt_words = torch.from_numpy(gt_words).long() word_mask = torch.from_numpy(word_mask).long() data = dict( imgs=img, gt_texts=gt_text, gt_kernels=gt_kernels, training_masks=training_mask, gt_instances=gt_instance, gt_bboxes=gt_bboxes, ) if self.with_rec: data.update(dict(gt_words=gt_words, word_masks=word_mask)) return data
def __getitem__(self, index): data = super(HistDataset, self).__getitem__(index) label_test = data['label_test'] label = data['label'] weights = data['weights'] angle_range_label = data['angle_range_label'] if angle_range_label == 255: return data ############################################################################### lines_v, _rot_angle = lines.extract_lines((label_test == 1), self.angle_range_v) lines_h, _ = lines.extract_lines((label_test == 1), self.angle_range_h) lines_v_mask = lines.create_grid(label.shape, lines_v, width=16) lines_h_mask = lines.create_grid(label.shape, lines_h, width=16) """ plt.figure() plt.imshow(lines_v_mask) plt.figure() plt.imshow(lines_h_mask) plt.show() """ ############################################################################### sz = (len(self.rot_angles), ) + label.shape bin_label_v = np.zeros(sz, dtype=np.float32) bin_label_v[angle_range_label] = lines_v_mask.astype(np.float32) bin_label_h = np.zeros(sz, dtype=np.float32) bin_label_h[angle_range_label] = lines_h_mask.astype(np.float32) bin_label = np.stack((bin_label_v, bin_label_h), 0) weights_v = np.repeat(weights[np.newaxis, ...], len(self.rot_angles), 0) weights_h = np.repeat(weights[np.newaxis, ...], len(self.rot_angles), 0) close_idx_0 = angle_range_label - 1 close_idx_1 = angle_range_label + 1 if close_idx_0 > 0: weights_v[close_idx_0] *= (lines_v_mask == 0) weights_h[close_idx_0] *= (lines_h_mask == 0) if close_idx_1 < len(self.rot_angles): weights_v[close_idx_1] *= (lines_v_mask == 0) weights_h[close_idx_1] *= (lines_h_mask == 0) pdb.set_trace() weights = np.stack((weights_v, weights_h), 0) ############################################################################### softmax_label_v = 255 * np.ones(label.shape, dtype=np.int64) softmax_label_v[lines_v_mask.astype(bool)] = angle_range_label softmax_label_h = 255 * np.ones(label.shape, dtype=np.int64) softmax_label_h[lines_h_mask.astype(bool)] = angle_range_label softmax_label = np.stack((softmax_label_v, softmax_label_h), 0) ############################################################################### data.update(bin_label=bin_label, softmax_label=softmax_label, weights=weights) return data
def __getitem__(self, idx): ''' Returns an item of the dataset. Args: idx (int): ID of data point ''' data_path = self.data[idx]['data_path'] subject = self.data[idx]['subject'] gender = self.data[idx]['gender'] data = {} aug_rot = self.augm_params().astype(np.float32) points_dict = np.load(data_path) # 3D models and points loc = points_dict['loc'].astype(np.float32) trans = points_dict['trans'].astype(np.float32) root_loc = points_dict['Jtr'][0].astype(np.float32) scale = points_dict['scale'].astype(np.float32) # Also get GT SMPL poses pose_body = points_dict['pose_body'] pose_hand = points_dict['pose_hand'] pose = np.concatenate([pose_body, pose_hand], axis=-1) pose = R.from_rotvec(pose.reshape([-1, 3])) body_mesh_a_pose = points_dict['a_pose_mesh_points'] # Break symmetry if given in float16: if body_mesh_a_pose.dtype == np.float16: body_mesh_a_pose = body_mesh_a_pose.astype(np.float32) body_mesh_a_pose += 1e-4 * np.random.randn(*body_mesh_a_pose.shape) else: body_mesh_a_pose = body_mesh_a_pose.astype(np.float32) n_smpl_points = body_mesh_a_pose.shape[0] bone_transforms = points_dict['bone_transforms'].astype(np.float32) # Apply rotation augmentation to bone transformations bone_transforms_aug = np.matmul(np.expand_dims(aug_rot, axis=0), bone_transforms) bone_transforms_aug[:, :3, -1] += root_loc - trans - np.dot(aug_rot[:3, :3], root_loc - trans) bone_transforms = bone_transforms_aug # Get augmented posed-mesh skinning_weights = self.skinning_weights[gender] if self.use_abs_bone_transforms: J_regressor = self.J_regressors[gender] T = np.dot(skinning_weights, bone_transforms.reshape([-1, 16])).reshape([-1, 4, 4]) homogen_coord = np.ones([n_smpl_points, 1], dtype=np.float32) a_pose_homo = np.concatenate([body_mesh_a_pose - trans, homogen_coord], axis=-1).reshape([n_smpl_points, 4, 1]) body_mesh = np.matmul(T, a_pose_homo)[:, :3, 0].astype(np.float32) + trans # Get extents of model. bb_min = np.min(body_mesh, axis=0) bb_max = np.max(body_mesh, axis=0) # total_size = np.sqrt(np.square(bb_max - bb_min).sum()) total_size = (bb_max - bb_min).max() # Scales all dimensions equally. scale = max(1.6, total_size) # 1.6 is the magic number from IPNet loc = np.array( [(bb_min[0] + bb_max[0]) / 2, (bb_min[1] + bb_max[1]) / 2, (bb_min[2] + bb_max[2]) / 2], dtype=np.float32 ) posed_trimesh = trimesh.Trimesh(vertices=body_mesh, faces=self.faces) # a_pose_trimesh = trimesh.Trimesh(vertices=(body_mesh_a_pose - trans) * 1.0 / scale * 1.5, faces=self.faces) n_points_uniform = int(self.points_size * self.points_uniform_ratio) n_points_surface = self.points_size - n_points_uniform boxsize = 1 + self.points_padding points_uniform = np.random.rand(n_points_uniform, 3) points_uniform = boxsize * (points_uniform - 0.5) # Scale points in (padded) unit box back to the original space points_uniform *= scale points_uniform += loc # Sample points around posed-mesh surface n_points_surface_cloth = n_points_surface // 2 if self.double_layer else n_points_surface points_surface = posed_trimesh.sample(n_points_surface_cloth + self.input_pointcloud_n) if self.input_type == 'pointcloud': input_pointcloud = points_surface[n_points_surface_cloth:] noise = self.input_pointcloud_noise * np.random.randn(*input_pointcloud.shape) input_pointcloud = (input_pointcloud + noise).astype(np.float32) points_surface = points_surface[:n_points_surface_cloth] points_surface += np.random.normal(scale=self.points_sigma, size=points_surface.shape) if self.double_layer: n_points_surface_minimal = n_points_surface // 2 posedir = self.posedirs[gender] minimal_shape_path = os.path.join(self.cape_path, 'cape_release', 'minimal_body_shape', subject, subject + '_minimal.npy') minimal_shape = np.load(minimal_shape_path) pose_mat = pose.as_matrix() ident = np.eye(3) pose_feature = (pose_mat - ident).reshape([207, 1]) pose_offsets = np.dot(posedir.reshape([-1, 207]), pose_feature).reshape([6890, 3]) minimal_shape += pose_offsets if self.use_abs_bone_transforms: Jtr_cano = np.dot(J_regressor, minimal_shape) Jtr_cano = Jtr_cano[IPNET2SMPL_IDX, :] a_pose_homo = np.concatenate([minimal_shape, homogen_coord], axis=-1).reshape([n_smpl_points, 4, 1]) minimal_body_mesh = np.matmul(T, a_pose_homo)[:, :3, 0].astype(np.float32) + trans minimal_posed_trimesh = trimesh.Trimesh(vertices=minimal_body_mesh, faces=self.faces) # Sample points around minimally clothed posed-mesh surface points_surface_minimal = minimal_posed_trimesh.sample(n_points_surface_minimal) points_surface_minimal += np.random.normal(scale=self.points_sigma, size=points_surface_minimal.shape) points_surface = np.vstack([points_surface, points_surface_minimal]) # Check occupancy values for sampled ponits query_points = np.vstack([points_uniform, points_surface]).astype(np.float32) if self.double_layer: # Double-layer occupancies, as was done in IPNet # 0: outside, 1: between body and cloth, 2: inside body mesh occupancies_cloth = check_mesh_contains(posed_trimesh, query_points) occupancies_minimal = check_mesh_contains(minimal_posed_trimesh, query_points) occupancies = occupancies_cloth.astype(np.int64) occupancies[occupancies_minimal] = 2 else: occupancies = check_mesh_contains(posed_trimesh, query_points).astype(np.float32) # Skinning inds by querying nearest SMPL vertex on the clohted mesh kdtree = KDTree(body_mesh if self.query_on_clothed else minimal_body_mesh) _, p_idx = kdtree.query(query_points) pts_W = skinning_weights[p_idx, :] skinning_inds_ipnet = self.part_labels[p_idx] # skinning inds (14 parts) skinning_inds_smpl = pts_W.argmax(1) # full skinning inds (24 parts) if self.num_joints == 14: skinning_inds = skinning_inds_ipnet else: skinning_inds = skinning_inds_smpl # Invert LBS to get query points in A-pose space T = np.dot(pts_W, bone_transforms.reshape([-1, 16])).reshape([-1, 4, 4]) T = np.linalg.inv(T) homogen_coord = np.ones([self.points_size, 1], dtype=np.float32) posed_homo = np.concatenate([query_points - trans, homogen_coord], axis=-1).reshape([self.points_size, 4, 1]) query_points_a_pose = np.matmul(T, posed_homo)[:, :3, 0].astype(np.float32) + trans if self.use_abs_bone_transforms: assert (not self.use_v_template and self.num_joints == 24) query_points_a_pose -= Jtr_cano[SMPL2IPNET_IDX[skinning_inds], :] if self.use_v_template: v_template = self.v_templates[gender] pose_shape_offsets = v_template - minimal_shape query_points_template = query_points_a_pose + pose_shape_offsets[p_idx, :] sc_factor = 1.0 / scale * 1.5 if self.normalized_scale else 1.0 # 1.5 is the magic number from IPNet offset = loc bone_transforms_inv = bone_transforms.copy() bone_transforms_inv[:, :3, -1] += trans - loc bone_transforms_inv = np.linalg.inv(bone_transforms_inv) bone_transforms_inv[:, :3, -1] *= sc_factor data = { None: (query_points - offset) * sc_factor, 'occ': occupancies, 'trans': trans, 'root_loc': root_loc, 'pts_a_pose': (query_points_a_pose - (trans if self.use_global_trans else offset)) * sc_factor, 'skinning_inds': skinning_inds, 'skinning_inds_ipnet': skinning_inds_ipnet, 'skinning_inds_smpl': skinning_inds_smpl, 'loc': loc, 'scale': scale, 'bone_transforms': bone_transforms, 'bone_transforms_inv': bone_transforms_inv, } if self.use_v_template: data.update({'pts_template': (query_points_template - (trans if self.use_global_trans else offset)) * sc_factor}) if self.mode in ['test']: data.update({'smpl_vertices': body_mesh, 'smpl_a_pose_vertices': body_mesh_a_pose}) if self.double_layer: data.update({'minimal_smpl_vertices': minimal_body_mesh}) data_out = {} field_name = 'points' if self.mode in ['train', 'test'] else 'points_iou' for k, v in data.items(): if k is None: data_out[field_name] = v else: data_out['%s.%s' % (field_name, k)] = v if self.input_type == 'pointcloud': data_out.update( {'inputs': (input_pointcloud - offset) * sc_factor, 'idx': idx, } ) elif self.input_type == 'voxel': voxels = np.unpackbits(points_dict['voxels_occ']).astype(np.float32) voxels = np.reshape(voxels, [self.voxel_res] * 3) data_out.update( {'inputs': voxels, 'idx': idx, } ) else: raise ValueError('Unsupported input type: {}'.format(self.input_type)) return data_out
def __getitem__(self, index): # index = (index + 2000) % len(self.roidb) blob = defaultdict(list) im_blob, im_scales = self.get_image_blob([self.roidb[index]]) if config.network.has_rpn: if self.phase != 'test': add_rpn_blobs(blob, im_scales, [self.roidb[index]]) data = {'data': im_blob, 'im_info': blob['im_info']} label = {'roidb': blob['roidb'][0]} for stride in config.network.rpn_feat_stride: label.update({ 'rpn_labels_fpn{}'.format(stride): blob['rpn_labels_int32_wide_fpn{}'.format( stride)].astype(np.int64), 'rpn_bbox_targets_fpn{}'.format(stride): blob['rpn_bbox_targets_wide_fpn{}'.format(stride)], 'rpn_bbox_inside_weights_fpn{}'.format(stride): blob['rpn_bbox_inside_weights_wide_fpn{}'.format( stride)], 'rpn_bbox_outside_weights_fpn{}'.format(stride): blob['rpn_bbox_outside_weights_wide_fpn{}'.format( stride)] }) else: data = { 'data': im_blob, 'im_info': np.array( [[im_blob.shape[-2], im_blob.shape[-1], im_scales[0]]], np.float32) } label = {'roidb': self.roidb[index]} else: raise NotImplementedError if config.network.has_fcn_head: if self.phase != 'test': seg_gt = np.array( Image.open(self.roidb[index]['image'].replace( 'images', 'annotations'))) if self.roidb[index]['flipped']: seg_gt = np.fliplr(seg_gt) seg_gt = cv2.resize(seg_gt, None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) label.update({'seg_gt': seg_gt}) # label.update({'seg_gt_4x': cv2.resize(seg_gt, (im_blob.shape[-1] // 4, im_blob.shape[-2] // 4), interpolation=cv2.INTER_NEAREST)}) label.update({'gt_classes': label['roidb']['gt_classes']}) label.update({ 'mask_gt': np.zeros((len(label['gt_classes']), im_blob.shape[-2], im_blob.shape[-1])) }) for i in range(len(label['gt_classes'])): if type(label['roidb']['segms'][i]) is list and type( label['roidb']['segms'][i][0]) is list: img = Image.new( 'L', (int(np.round(im_blob.shape[-1] / im_scales[0])), int(np.round(im_blob.shape[-2] / im_scales[0]))), 0) for j in range(len(label['roidb']['segms'][i])): ImageDraw.Draw(img).polygon(tuple( label['roidb']['segms'][i][j]), outline=1, fill=1) # try: # ImageDraw.Draw(img).polygon(tuple(label['roidb']['segms'][i][j]), outline=1, fill=1) # except: # print(label['roidb']['segms'][i], j) # import pdb; pdb.set_trace() # sys.exit() label['mask_gt'][i] = cv2.resize( np.array(img), None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) else: assert type( label['roidb']['segms'][i]) is dict or type( label['roidb']['segms'][i][0]) is dict if type(label['roidb']['segms'][i]) is dict: label['mask_gt'][i] = cv2.resize( mask_util.decode( mask_util.frPyObjects( [label['roidb']['segms'][i]], label['roidb']['segms'][i]['size'][0], label['roidb']['segms'][i]['size'][1])) [:, :, 0], None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) else: assert len(label['roidb']['segms'][i]) == 1 output = mask_util.decode( label['roidb']['segms'][i]) label['mask_gt'][i] = cv2.resize( output[:, :, 0], None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) if config.train.fcn_with_roi_loss: gt_boxes = label['roidb']['boxes'][np.where( label['roidb']['gt_classes'] > 0)[0]] gt_boxes = np.around(gt_boxes * im_scales[0]).astype( np.int32) label.update({ 'seg_roi_gt': np.zeros((len(gt_boxes), config.network.mask_size, config.network.mask_size), dtype=np.int64) }) for i in range(len(gt_boxes)): if gt_boxes[i][3] == gt_boxes[i][1]: gt_boxes[i][3] += 1 if gt_boxes[i][2] == gt_boxes[i][0]: gt_boxes[i][2] += 1 label['seg_roi_gt'][i] = cv2.resize( seg_gt[gt_boxes[i][1]:gt_boxes[i][3], gt_boxes[i][0]:gt_boxes[i][2]], (config.network.mask_size, config.network.mask_size), interpolation=cv2.INTER_NEAREST) else: seg_gt = np.array( Image.open(self.roidb[index]['image'].replace( 'images', 'annotations'))) if self.roidb[index]['flipped']: seg_gt = np.fliplr(seg_gt) seg_gt = cv2.resize(seg_gt, None, None, fx=im_scales[0], fy=im_scales[0], interpolation=cv2.INTER_NEAREST) label.update({'seg_gt': seg_gt}) if config.network.has_crf: data.update(self.build_crf_graph(im_blob)) return data, label, index