class simulator(object): def __init__(self, db, batch_size=None, nn_table=None): self.db = db self.cfg = db.cfg self.batch_size = batch_size if batch_size is not None else self.cfg.batch_size if nn_table is None: self.nn_table = AllCategoriesTables(db) self.nn_table.build_nntables_for_all_categories() else: self.nn_table = nn_table def reset(self): self.scenes = [] frames = [] if self.cfg.use_color_volume: channel_dim = 3 * self.cfg.output_vocab_size else: channel_dim = 4 + self.cfg.output_vocab_size for i in range(self.batch_size): scene = {} scene['out_inds'] = [] scene['out_vecs'] = [] scene['out_patches'] = [] frame = np.zeros( ( self.cfg.input_image_size[1], self.cfg.input_image_size[0], channel_dim ) ) scene['last_frame'] = frame scene['last_label'] = np.zeros( ( self.cfg.input_image_size[1], self.cfg.input_image_size[0] ), dtype=np.int32 ) scene['last_mask'] = np.zeros( ( self.cfg.input_image_size[1], self.cfg.input_image_size[0] ), dtype=np.float32 ) self.scenes.append(scene) frames.append(frame) frames = np.stack(frames, axis=0) return torch.from_numpy(frames) def batch_render_to_pytorch(self, out_inds, out_vecs): assert(len(out_inds) == self.batch_size) outputs = [] for i in range(self.batch_size): frame = self.update_scene(self.scenes[i], {'out_inds': out_inds[i], 'out_vec': out_vecs[i]}) outputs.append(frame) outputs = np.stack(outputs, 0) return torch.from_numpy(outputs) def batch_redraw(self, return_sequence=False): out_frames, out_noises, out_masks, out_labels, out_scenes = [], [], [], [], [] for i in range(len(self.scenes)): predicted_scene = self.db.prediction_outputs_to_scene(self.scenes[i], self.nn_table) predicted_scene['patches'] = self.scenes[i]['out_patches'] frames, noises, masks, labels = self.render_predictions_as_output(predicted_scene, return_sequence) if not return_sequence: frames = frames[None, ...] noises = noises[None, ...] masks = masks[None, ...] labels = labels[None, ...] out_frames.append(frames) out_noises.append(noises) out_masks.append(masks) out_labels.append(labels) out_scenes.append(predicted_scene) return out_frames, out_noises, out_masks, out_labels, out_scenes def render_predictions_as_output(self, scene, return_sequence): width = scene['width'] height = scene['height'] clses = scene['clses'] boxes = scene['boxes'] patches = scene['patches'] if self.cfg.use_color_volume: channel_dim = 3 * self.cfg.output_vocab_size else: channel_dim = 4 + self.cfg.output_vocab_size frame = np.zeros((height, width, channel_dim)) noise = np.zeros((height, width, channel_dim)) label = np.zeros((height, width), dtype=np.int32) mask = np.zeros((height, width), dtype=np.float32) out_frames, out_noises, out_labels, out_masks = [], [], [], [] for i in range(len(clses)): cls_ind = clses[i] xywh = boxes[i] patch = patches[i] xyxy = xywh_to_xyxy(xywh, width, height) if self.cfg.use_color_volume: frame[:,:,3*cls_ind:3*(cls_ind+1)], mask, _, label, noise[:,:,3*cls_ind:3*(cls_ind+1)] = \ patch_compose_and_erose(frame[:,:,3*cls_ind:3*(cls_ind+1)], mask, label, \ xyxy, patch, self.db, noise[:,:,3*cls_ind:3*(cls_ind+1)]) else: frame[:,:,-3:], mask, _, label, noise[:,:,-3:] = \ patch_compose_and_erose(frame[:,:,-3:], mask, label, xyxy, patch, self.db, noise[:,:,-3:]) frame[:,:,-4] = np.maximum(mask*255, frame[:,:,-4]) frame[:,:,cls_ind] = np.maximum(mask*255, frame[:,:,cls_ind]) out_frames.append(frame.copy()) out_noises.append(noise.copy()) out_labels.append(label.copy()) out_masks.append(mask.copy()) if len(clses) == 0: out_frames.append(frame.copy()) out_noises.append(noise.copy()) out_labels.append(label.copy()) out_masks.append(mask.copy()) if return_sequence: return np.stack(out_frames, 0), np.stack(out_noises, 0), np.stack(out_masks, 0), np.stack(out_labels, 0) else: return out_frames[-1], out_noises[-1], out_masks[-1], out_labels[-1] def update_scene(self, scene, step_prediction): ############################################################## # Update the scene and the last instance of the scene ############################################################## out_inds = step_prediction['out_inds'].flatten() out_vec = step_prediction['out_vec'].flatten() scene['out_inds'].append(out_inds) scene['out_vecs'].append(out_vec) scene['last_frame'], scene['last_mask'], scene['last_label'], current_patch = \ self.update_frame(scene['last_frame'], scene['last_mask'], scene['last_label'], out_inds, out_vec) scene['out_patches'].append(current_patch) return scene['last_frame'] def update_frame(self, input_frame, input_mask, input_label, input_inds, input_vec): if input_inds[0] <= self.cfg.EOS_idx: return input_frame, input_mask, input_label, None w = input_frame.shape[-2] h = input_frame.shape[-3] cls_ind = input_inds[0] xywh = self.db.index2box(input_inds[1:]) xywh = xywh * np.array([w, h, w, h]) xyxy = xywh_to_xyxy(xywh, w, h) patch = self.nn_table.retrieve(cls_ind, input_vec)[0] # print(patch) # print(patch['name']) # update the frame if self.cfg.use_color_volume: input_frame[:,:,3*cls_ind:3*(cls_ind+1)], input_mask, _, input_label, _ = \ patch_compose_and_erose(input_frame[:,:,3*cls_ind:3*(cls_ind+1)], input_mask, input_label, xyxy, patch, self.db) else: input_frame[:,:,-3:], input_mask, _, input_label, _ = \ patch_compose_and_erose(input_frame[:,:,-3:], input_mask, input_label, xyxy, patch, self.db) input_frame[:,:,-4] = np.maximum(255*input_mask, input_frame[:,:,-4]) input_frame[:,:,cls_ind] = np.maximum(255*input_mask, input_frame[:,:,cls_ind]) return input_frame, input_mask, input_label, patch
def generate_simulated_scenes(config, split, year): db = coco(config, split, year) data_dir = osp.join(config.data_dir, 'coco') if (split == 'test') or (split == 'aux'): images_dir = osp.join(data_dir, 'crn_images', 'train' + year) noices_dir = osp.join(data_dir, 'crn_noices', 'train' + year) labels_dir = osp.join(data_dir, 'crn_labels', 'train' + year) masks_dir = osp.join(data_dir, 'crn_masks', 'train' + year) else: images_dir = osp.join(data_dir, 'crn_images', split + year) noices_dir = osp.join(data_dir, 'crn_noices', split + year) labels_dir = osp.join(data_dir, 'crn_labels', split + year) masks_dir = osp.join(data_dir, 'crn_masks', split + year) maybe_create(images_dir) maybe_create(noices_dir) maybe_create(labels_dir) maybe_create(masks_dir) traindb = coco(config, 'train', '2017') nn_tables = AllCategoriesTables(traindb) nn_tables.build_nntables_for_all_categories(True) # start_ind = 0 # end_ind = len(db.scenedb) start_ind = 25000 + 14000 * config.seed end_ind = 25000 + 14000 * (config.seed + 1) patches_per_class = traindb.patches_per_class color_transfer_threshold = 0.8 for i in range(start_ind, end_ind): entry = db.scenedb[i] width = entry['width'] height = entry['height'] xywhs = entry['boxes'] masks = entry['masks'] clses = entry['clses'] image_index = entry['image_index'] instance_inds = entry['instance_inds'] full_mask = np.zeros((height, width), dtype=np.float32) full_label = np.zeros((height, width), dtype=np.float32) full_image = np.zeros((height, width, 3), dtype=np.float32) full_noice = np.zeros((height, width, 3), dtype=np.float32) original_image = cv2.imread(db.color_path_from_index(image_index), cv2.IMREAD_COLOR) for j in range(len(masks)): src_img = original_image.astype(np.float32).copy() xywh = xywhs[j] mask = masks[j] cls_idx = clses[j] instance_ind = instance_inds[j] embed_path = db.patch_path_from_indices( image_index, instance_ind, 'patch_feature', 'pkl', config.use_patch_background) with open(embed_path, 'rb') as fid: query_vector = pickle.load(fid) n_samples = min( 100, len(patches_per_class[cls_idx]) ) #min(config.n_nntable_trees, len(patches_per_class[cls_idx])) candidate_patches = nn_tables.retrieve(cls_idx, query_vector, n_samples) candidate_patches = [ x for x in candidate_patches if x['instance_ind'] != instance_ind ] assert (len(candidate_patches) > 1) # candidate_instance_ind = instance_ind # candidate_patch = None # while (candidate_instance_ind == instance_ind): # cid = np.random.randint(0, len(candidate_patches)) # candidate_patch = candidate_patches[cid] # candidate_instance_ind = candidate_patch['instance_ind'] candidate_patch = find_closest_patch(db, traindb, image_index, instance_ind, candidate_patches) # stenciling src_mask = COCOmask.decode(mask) dst_mask = COCOmask.decode(candidate_patch['mask']) src_xyxy = xywh_to_xyxy(xywh, width, height) dst_xyxy = xywh_to_xyxy(candidate_patch['box'], candidate_patch['width'], candidate_patch['height']) dst_mask = dst_mask[dst_xyxy[1]:(dst_xyxy[3] + 1), dst_xyxy[0]:(dst_xyxy[2] + 1)] dst_mask = cv2.resize( dst_mask, (src_xyxy[2] - src_xyxy[0] + 1, src_xyxy[3] - src_xyxy[1] + 1), interpolation=cv2.INTER_NEAREST) src_mask[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1)] = \ np.minimum(dst_mask, src_mask[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1)]) # color transfer if random.random() > color_transfer_threshold: candidate_index = candidate_patch['image_index'] candidate_image = cv2.imread( traindb.color_path_from_index(candidate_index), cv2.IMREAD_COLOR).astype(np.float32) candidate_cropped = candidate_image[dst_xyxy[1]:(dst_xyxy[3] + 1), dst_xyxy[0]:(dst_xyxy[2] + 1)] candidate_cropped = cv2.resize(candidate_cropped, (src_xyxy[2] - src_xyxy[0] + 1, src_xyxy[3] - src_xyxy[1] + 1), interpolation=cv2.INTER_CUBIC) original_cropped = src_img[src_xyxy[1]:(src_xyxy[3] + 1), src_xyxy[0]:(src_xyxy[2] + 1)] transfer_cropped = Monge_Kantorovitch_color_transfer( original_cropped, candidate_cropped) src_img[src_xyxy[1]:(src_xyxy[3] + 1), src_xyxy[0]:(src_xyxy[2] + 1)] = transfer_cropped # im1 = cv2.resize(full_image, (128, 128)) # im2 = cv2.resize(src_img[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1), :], (128, 128)) # # im2 = cv2.resize(np.repeat(255*src_mask[...,None], 3, -1), (128, 128)) # im3 = cv2.resize(candidate_image, (128, 128)) # im4 = cv2.resize(candidate_cropped, (128, 128)) # im = np.concatenate((im1, im2, im3, im4), 1) # cv2.imwrite("%03d_%03d.png"%(i, j), im) full_image = compose(full_image, src_img, src_mask) # boundary elision radius = int(0.05 * min(width, height)) if np.amin(src_mask) > 0: src_mask[0, :] = 0 src_mask[-1, :] = 0 src_mask[:, 0] = 0 src_mask[:, -1] = 0 sobelx = cv2.Sobel(src_mask, cv2.CV_64F, 1, 0, ksize=3) sobely = cv2.Sobel(src_mask, cv2.CV_64F, 0, 1, ksize=3) sobel = np.abs(sobelx) + np.abs(sobely) edge = np.zeros_like(sobel) edge[sobel > 0.9] = 1.0 morp_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (radius, radius)) edge = cv2.dilate(edge, morp_kernel, iterations=1) row, col = np.where(edge > 0) n_edge_pixels = len(row) pixel_indices = np.random.permutation(range(n_edge_pixels)) pixel_indices = pixel_indices[:(n_edge_pixels // 2)] row = row[pixel_indices] col = col[pixel_indices] src_img[row, col, :] = 255 full_mask = np.maximum(full_mask, src_mask) full_label[src_mask > 0] = cls_idx full_noice = compose(full_noice, src_img, src_mask) # im1 = cv2.resize(full_image, (128, 128)) # im2 = cv2.resize(src_img[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1), :], (128, 128)) # im3 = cv2.resize(candidate_image, (128, 128)) # im4 = cv2.resize(candidate_cropped, (128, 128)) # im = np.concatenate((im1, im2, im3, im4), 1) # cv2.imwrite("%03d_%03d.png"%(i, j), im) output_name = str(image_index).zfill(12) output_path = osp.join(images_dir, output_name + '.jpg') cv2.imwrite(output_path, clamp_array(full_image, 0, 255).astype(np.uint8)) output_path = osp.join(noices_dir, output_name + '.jpg') cv2.imwrite(output_path, clamp_array(full_noice, 0, 255).astype(np.uint8)) output_path = osp.join(masks_dir, output_name + '.png') cv2.imwrite(output_path, clamp_array(255 * full_mask, 0, 255).astype(np.uint8)) output_path = osp.join(labels_dir, output_name + '.png') cv2.imwrite(output_path, full_label.astype(np.uint8)) print(i, image_index)