def animateSimulatedTask(images): print(len(images)) num_across = 5 task_img0 = images # taskid = tasks[40] height = int(math.ceil(len(task_img0) / num_across)) fig, axs = plt.subplots(height, num_across, figsize=(20, 15)) fig.tight_layout() plt.subplots_adjust(hspace=0.2, wspace=0.2) # We can visualize the simulation at each timestep. for i, (ax, image) in enumerate(zip(axs.flatten(), task_img0)): # Convert the simulation observation to images. if image is None: continue img = phyre.observations_to_float_rgb(image) ax.imshow(img) ax.title.set_text(f'Timestep {i}') ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) plt.show() frames = [] for taskimgi in task_img0: if taskimgi is None: continue frames.append(phyre.observations_to_uint8_rgb(taskimgi)) from array2gif import write_gif timestr = time.strftime("%Y%m%d-%H%M%S") write_gif(np.asarray(frames), 'rgbbgr.gif' + timestr, fps=8)
def prepareSamplesWithScore(is_update=False): print("here") path = "/media/kyra/Elements/phyre/agents/ConcSamples/Concat" samples = np.load(path, allow_pickle=True) images = [] action_list = [] label = [] for i in range(len(samples)): score = samples[i][1] image = samples[i][0].images[0] rgb_image = phyre.observations_to_float_rgb(image) rgb_image = resize(rgb_image, (128, 128)) images.append(rgb_image) # image2 = resize(samples[i].images[-1], (64, 64,1)) # images.append(rgb_image) # images.append(image2) action = samples[i][0].featurized_objects.features[0][2] actions = np.array([action[0], action[1], action[3]]) action_list.append(actions) # if flag == "score": # score = sf.ScoreFunctionValue(samples[i]) # label.append(score) # else: label.append(score) label = np.asarray(label) images = np.asarray(images) action = np.asarray(action_list) return images, action, label
def prepareTestSamplesWithScore(): samples = concatenateSamples() images = [] action_list = [] label = [] for i in range(len(samples)): score = sf.ScoreFunctionValue(samples[i]) image = samples[i].images[0] rgb_image = phyre.observations_to_float_rgb(image) rgb_image = resize(rgb_image, (128, 128)) images.append(rgb_image) # image2 = resize(samples[i].images[-1], (64, 64,1)) # images.append(rgb_image) # images.append(image2) action = samples[i].featurized_objects.features[0][2] actions = np.array([action[0], action[1], action[3]]) action_list.append(actions) # if flag == "score": # score = sf.ScoreFunctionValue(samples[i]) # label.append(score) # else: label.append(score) label = np.asarray(label) images = np.asarray(images) action = np.asarray(action_list) return images, action, label
def parse_image(video_name, vid_idx, img_idx, seq_size): images = hickle.load( video_name.replace('images', 'full').replace('.npy', '_image.hkl')) data = np.array( [phyre.observations_to_float_rgb(img.astype(int)) for img in images], dtype=np.float).transpose((0, 3, 1, 2)) data = data[img_idx:img_idx + seq_size] return data
def _get_phyre_im(im_name, high_res): # for phyre # high resolution: read the raw input originally by phyre # low resolution: directly reading rgb input of network if high_res: import phyre im = cv2.imread(im_name.replace('rgb', 'raw'), cv2.IMREAD_UNCHANGED) im = phyre.observations_to_float_rgb(im)[::-1] else: im = cv2.imread(im_name) return im
def prepareTestSamples(): samples = concatenateTestSamples() action_list = [] image_list = [] for i in range(len(samples)): image = samples[i][0].images[0] action = samples[i][0].featurized_objects.features[0][2] rgb_image = phyre.observations_to_float_rgb(image) rgb_image = resize(rgb_image, (128, 128)) image_list.append(np.array([rgb_image])) actions = np.array([action[0], action[1], action[3]]) action_list.append(np.array([actions])) action = np.asarray(action_list) image = np.asarray(image_list) return action, image
def _parse_image(self, video_name, vid_idx, img_idx): if C.INPUT.PHYRE_USE_EMBEDDING: data = np.load(video_name)[::-1] data = np.ascontiguousarray(data) data = torch.from_numpy(data).long() data = self._image_colors_to_onehot(data) data = data.numpy()[None] else: env_name = video_name.split('/')[-3] images = hickle.load( video_name.replace('images', 'full').replace('.npy', '_image.hkl')) data = np.array([ phyre.observations_to_float_rgb(img.astype(int)) for img in images ], dtype=np.float).transpose((0, 3, 1, 2)) data = data[img_idx:img_idx + self.seq_size] return data, images[img_idx:img_idx + self.seq_size], env_name
def __init__(self, task_id, initial_scene, phyre_initial_featurized_objects, featurized_objects_wrapper=None): task = task_id.split(':') self.task_id = task_id self.template_id = task[0] self.modification_id = task[1] self.initial_scene = initial_scene self.rgb = phyre.observations_to_float_rgb(initial_scene) self.featurized_objects = [] self.initial_featurized_objects = phyre_initial_featurized_objects self.featurized_objects_wrapper = featurized_objects_wrapper if self.initial_featurized_objects is not None: self._get_featurized_objects_from_phyre() if self.featurized_objects_wrapper is not None: self._get_featurized_objects_from_wrapper() self.goal_objects = [] self._get_goal_objects()
def __getitem__(self, idx): task_id, acts = self.video_info[idx, 0], self.video_info[idx, 1:] sim = self.simulator.simulate_action(int(task_id), acts, stride=60, need_images=True, need_featurized_objects=True) images = sim.images objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] num_objs = objs.shape[1] boxes = np.zeros((len(images), num_objs, 5)) masks = np.zeros( (len(images), num_objs, C.RPIN.MASK_SIZE, C.RPIN.MASK_SIZE)) init_image = cv2.resize(images[0], (C.RPIN.INPUT_WIDTH, C.RPIN.INPUT_HEIGHT), interpolation=cv2.INTER_NEAREST) if objs[0, :, 8:].sum(0).max() == 1: simple_parse = True else: simple_parse = False for im_id, (raw_image, obj) in enumerate( zip(images[:self.seq_size], objs[:self.seq_size])): im_height = raw_image.shape[0] im_width = raw_image.shape[1] obj_ids = np.intersect1d(np.unique(raw_image), [1, 2, 3, 5]) if simple_parse: for o_id, raw_obj_id in enumerate(obj_ids): mask = (raw_image == raw_obj_id) mask = mask[::-1] [h, w] = np.where(mask > 0) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() masks[im_id, o_id] = cv2.resize( mask[y1:y2 + 1, x1:x2 + 1].astype(np.float32), (C.RPIN.MASK_SIZE, C.RPIN.MASK_SIZE)) >= 0.5 x1 *= (C.RPIN.INPUT_WIDTH - 1) / (im_width - 1) x2 *= (C.RPIN.INPUT_WIDTH - 1) / (im_width - 1) y1 *= (C.RPIN.INPUT_HEIGHT - 1) / (im_height - 1) y2 *= (C.RPIN.INPUT_HEIGHT - 1) / (im_height - 1) boxes[im_id, o_id] = [o_id, x1, y1, x2, y2] else: for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( obj[[o_id]]) mask = mask[::-1] mask = mask > 0 [h, w] = np.where(mask > 0) assert len(h) > 0 and len(w) > 0 x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() masks[im_id, o_id] = cv2.resize( mask[y1:y2 + 1, x1:x2 + 1].astype(np.float32), (C.RPIN.MASK_SIZE, C.RPIN.MASK_SIZE)) >= 0.5 x1 *= (C.RPIN.INPUT_WIDTH - 1) / (im_width - 1) x2 *= (C.RPIN.INPUT_WIDTH - 1) / (im_width - 1) y1 *= (C.RPIN.INPUT_HEIGHT - 1) / (im_height - 1) y2 *= (C.RPIN.INPUT_HEIGHT - 1) / (im_height - 1) boxes[im_id, o_id] = [o_id, x1, y1, x2, y2] labels = torch.from_numpy( np.array(int(sim.status == 1), dtype=np.float32)) boxes = boxes[:self.seq_size, :, 1:] data = np.array([phyre.observations_to_float_rgb(init_image)], dtype=np.float).transpose((0, 3, 1, 2)) data_t = data.copy() gt_masks = masks[self.input_size:self.seq_size] # pad sequence boxes = np.concatenate( [boxes] + [boxes[[-1]] for _ in range(self.seq_size - boxes.shape[0])], axis=0) gt_masks = np.concatenate([gt_masks] + [ gt_masks[[-1]] for _ in range(self.pred_size - gt_masks.shape[0]) ], axis=0) # image flip augmentation if random.random( ) > 0.5 and self.split == 'train' and C.RPIN.HORIZONTAL_FLIP: boxes[..., [0, 2]] = self.input_width - boxes[..., [2, 0]] data = np.ascontiguousarray(data[..., ::-1]) gt_masks = np.ascontiguousarray(gt_masks[..., ::-1]) if random.random( ) > 0.5 and self.split == 'train' and C.RPIN.VERTICAL_FLIP: boxes[..., [1, 3]] = self.input_height - boxes[..., [3, 1]] data = np.ascontiguousarray(data[..., ::-1, :]) gt_masks = np.ascontiguousarray(gt_masks[..., ::-1]) # when the number of objects is fewer than the max number of objects num_objs = boxes.shape[1] g_idx = [] for i in range(C.RPIN.MAX_NUM_OBJS): for j in range(C.RPIN.MAX_NUM_OBJS): if j == i: continue g_idx.append([i, j, (i < num_objs) * (j < num_objs)]) g_idx = np.array(g_idx) valid = np.ones(C.RPIN.MAX_NUM_OBJS) valid[num_objs:] = 0 boxes = np.concatenate( [boxes] + [boxes[:, :1] for _ in range(C.RPIN.MAX_NUM_OBJS - num_objs)], axis=1) gt_masks = np.concatenate( [gt_masks] + [gt_masks[:, :1] for _ in range(C.RPIN.MAX_NUM_OBJS - num_objs)], axis=1) # rois rois = boxes[:self.input_size].copy() # gt boxes gt_boxes = boxes[self.input_size:].copy() gt_boxes = xyxy2xywh(gt_boxes.reshape(-1, 4)).reshape( (-1, C.RPIN.MAX_NUM_OBJS, 4)) gt_boxes[..., 0::2] /= self.input_width gt_boxes[..., 1::2] /= self.input_height gt_boxes = gt_boxes.reshape(self.pred_size, -1, 4) data = torch.from_numpy(data.astype(np.float32)) data_t = torch.from_numpy(data_t.astype(np.float32)) rois = torch.from_numpy(rois.astype(np.float32)) gt_boxes = torch.from_numpy(gt_boxes.astype(np.float32)) gt_masks = torch.from_numpy(gt_masks.astype(np.float32)) valid = torch.from_numpy(valid.astype(np.float32)) return data, data_t, rois, gt_boxes, gt_masks, valid, g_idx, labels
def test(self, start_id=0, end_id=25, fold_id=0, protocal='within'): random.seed(0) print(f'testing {protocal} fold {fold_id}') eval_setup = f'ball_{protocal}_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) _, _, test_tasks = phyre.get_fold(eval_setup, fold_id) # PHYRE setup candidate_list = [f'{i:05d}' for i in range(start_id, end_id)] # filter tasks test_list = [task for task in test_tasks if task.split(':')[0] in candidate_list] simulator = phyre.initialize_simulator(test_list, action_tier) # PHYRE evaluation num_all_actions = [1000, 2000, 5000, 8000, 10000] auccess = np.zeros((len(num_all_actions), len(test_list), 100)) batched_pred = C.SOLVER.BATCH_SIZE # DATA for network: all_data, all_acts, all_rois, all_image = [], [], [], [] cache = phyre.get_default_100k_cache('ball') acts = cache.action_array[:10000] # actions = cache.action_array[:100000] # training_data = cache.get_sample(test_list, None) pos_all, neg_all, pos_correct, neg_correct = 0, 0, 0, 0 objs_color = None for task_id, task in enumerate(test_list): confs, successes, num_valid_act_idx = [], [], [] boxes_cache_name = f'cache/{task.replace(":", "_")}.hkl' use_cache = os.path.exists(boxes_cache_name) all_boxes = hickle.load(boxes_cache_name) if use_cache else [] valid_act_cnt = 0 # sim_statuses = training_data['simulation_statuses'][task_id] # pos_acts = actions[sim_statuses == 1] # neg_acts = actions[sim_statuses == -1] # np.random.shuffle(pos_acts) # np.random.shuffle(neg_acts) # pos_acts = pos_acts[:50] # neg_acts = neg_acts[:200] # acts = np.concatenate([pos_acts, neg_acts]) for act_id, act in enumerate(acts): if act_id == 0: pprint(f'{task}: {task_id} / {len(test_list)}') sim = simulator.simulate_action(task_id, act, stride=60, need_images=True, need_featurized_objects=True) if sim.status == phyre.SimulationStatus.INVALID_INPUT: num_valid_act_idx.append(0) if act_id == len(acts) - 1 and len(all_data) > 0: # final action is invalid conf_t = self.batch_score(all_data, all_acts, all_rois, all_image, objs_color, task) confs = confs + conf_t all_data, all_acts, all_rois, all_image = [], [], [], [] continue num_valid_act_idx.append(1) successes.append(sim.status == phyre.SimulationStatus.SOLVED) if self.score_with_heuristic or self.score_with_mask: # parse object, prepare input for network: image = cv2.resize(sim.images[0], (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) all_image.append(image[::-1]) # for heuristic method to detect goal location, need to flip image = phyre.observations_to_float_rgb(image) objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] objs_color = np.array(objs_color)[objs_valid] num_objs = objs.shape[1] if use_cache: boxes = all_boxes[valid_act_cnt] valid_act_cnt += 1 else: boxes = np.zeros((1, num_objs, 5)) for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster(objs[0][[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) boxes[0, o_id] = [o_id, x1, y1, x2, y2] all_boxes.append(boxes) data = image.transpose((2, 0, 1))[None, None, :] data = torch.from_numpy(data.astype(np.float32)) rois = torch.from_numpy(boxes[..., 1:].astype(np.float32))[None, :] all_data.append(data) all_rois.append(rois) elif self.score_with_act: init = np.ascontiguousarray(simulator.initial_scenes[task_id][::-1]) init128 = cv2.resize(init, (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) all_data.append(torch.from_numpy(init128)) all_acts.append(torch.from_numpy(act[None, :])) elif self.score_with_vid_cls: rst_images = np.stack([np.ascontiguousarray( cv2.resize(rst_image, (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST)[::-1] ) for rst_image in sim.images]) all_data.append(torch.from_numpy(rst_images)) else: raise NotImplementedError if len(all_data) % batched_pred == 0 or act_id == len(acts) - 1: conf_t = self.batch_score(all_data, all_acts, all_rois, all_image, objs_color, task) confs = confs + conf_t all_data, all_acts, all_rois, all_image = [], [], [], [] if self.score_with_heuristic or self.score_with_mask: if not use_cache: all_boxes = np.stack(all_boxes) hickle.dump(all_boxes, boxes_cache_name, mode='w', compression='gzip') else: assert valid_act_cnt == len(all_boxes) pred = np.array(confs) >= 0.5 labels = np.array(successes) pos_all += (labels == 1).sum() neg_all += (labels == 0).sum() pos_correct += (pred == labels)[labels == 1].sum() neg_correct += (pred == labels)[labels == 0].sum() pos_acc = (pred == labels)[labels == 1].sum() / (labels == 1).sum() neg_acc = (pred == labels)[labels == 0].sum() / (labels == 0).sum() info = f'{pos_acc * 100:.1f} / {neg_acc * 100:.1f} ' # info = f'{task}: ' for j, num_acts in enumerate(num_all_actions): num_valid = np.sum(num_valid_act_idx[:num_acts]) top_acc = np.array(successes[:num_valid])[np.argsort(confs[:num_valid])[::-1]] for i in range(100): auccess[j, task_id, i] = int(np.sum(top_acc[:i + 1]) > 0) w = np.array([np.log(k + 1) - np.log(k) for k in range(1, 101)]) s = auccess[j, :task_id + 1].sum(0) / auccess[j, :task_id + 1].shape[0] info += f'{np.sum(w * s) / np.sum(w) * 100:.2f} {np.sum(successes[:num_valid])}/{num_acts // 1000}k | ' pprint(info) pprint(pos_correct, pos_all, pos_correct / pos_all) pprint(neg_correct, neg_all, neg_correct / neg_all) cache_output_dir = f'{self.output_dir.replace("figures/", "")}/' \ f'{self.proposal_setting}_{self.method}_{protocal}_fold_{fold_id}/' os.makedirs(cache_output_dir, exist_ok=True) print(cache_output_dir) stats = { 'auccess': auccess, 'p_c': pos_correct, 'p_a': pos_all, 'n_c': neg_correct, 'n_a': neg_all, } with open(f'{cache_output_dir}/{start_id}_{end_id}.pkl', 'wb') as f: pickle.dump(stats, f, pickle.HIGHEST_PROTOCOL)
distance_map = 255. - distance_map return distance_map # improve/debug time-step selection for injection # implement 5 random positions at the goal object # take into account grey obstacles # run the benchmark with stats on compute on GPU cluster if __name__ == "__main__": x = 42 y = 42 sim = phyre.initialize_simulator(['00002:017'], "ball") # img = cv2.imread('maze.png') # read image init_scene = sim.initial_scenes[0] img = phyre.observations_to_float_rgb(init_scene) # read image img = cv2.resize(img, (64, 64)) print(img) cv2.imwrite('00002_017_scene.png', img * 255) target = np.flip((init_scene == 4), axis=0).astype(float) target = cv2.resize(target, (64, 64)) # cv2.imwrite('maze-initial.png', img) distance_map = find_distance_map_obj(img, target) #distance_map[y-1, x] = 0. #distance_map[y, x] = 0. #distance_map[y+1, x] = 0. #distance_map[y, x-1] = 0. #distance_map[y, x+1] = 0. cv2.imwrite('00002_017_solution.png', distance_map) print('DONE') # %%
def solve(tasks, generator, save_images=False, force_collect=False, static=256, show=False): # Collect Interaction Data data_path = './data/cgan_solver' if not os.path.exists(data_path + '/interactions.pickle') or force_collect: os.makedirs(data_path, exist_ok=True) wid = generator.width print("Collecting Data") collect_interactions(data_path, tasks, 10, stride=1, size=(wid, wid), static=static) with open(data_path + '/interactions.pickle', 'rb') as fs: X = T.tensor(pickle.load(fs), dtype=T.float) with open(data_path + '/info.pickle', 'rb') as fs: info = pickle.load(fs) tasklist = info['tasks'] positions = info['pos'] orig_actions = info['action'] print('loaded dataset with shape:', X.shape) #data_set = T.utils.data.TensorDataset(X) #data_loader = T.utils.data.DataLoader(data_set, batch_size=BATCH_SIZE, shuffle=False) # Sim SETUP print('Succesfull collection for tasks:\n', tasklist) eval_setup = 'ball_within_template' sim = phyre.initialize_simulator(tasklist, 'ball') eva = phyre.Evaluator(tasklist) # Solve Loop error = np.zeros((X.shape[0], 3)) generator.eval() solved, tried = 0, 0 for i, task in enumerate(tasklist): # generate 'fake' noise = T.randn(1, generator.noise_dim) with T.no_grad(): fake = generator((X[i, :generator.s_chan])[None], noise)[0, 0] #action = np.array(pic_to_action_vector(fake, r_fac=1.8)) action = np.array(pic_to_action_vector(fake.numpy(), r_fac=1)) raw_action = action.copy() # PROCESS ACTION print(action, 'raw') # shift by half to get relative position action[:2] -= 0.5 # multiply by half because extracted scope is already half of the scene action[:2] *= 0.5 # multiply by 4 because action value is always 4*diameter -> 8*radius, but scope is already halfed -> 8*0.5*radius action[2] *= 4 # finetuning action[2] *= 1.0 print(action, 'relativ') pos = positions[i] print(pos) action[:2] += pos print(action, 'added') res = sim.simulate_action(i, action, need_featurized_objects=True) # Noisy tries while invalid actions t = 0 temp = 1 base_action = action while res.status.is_invalid() and t < 200: t += 1 action = base_action + (np.random.rand(3) - 0.5) * 0.01 * temp res = sim.simulate_action(i, action, need_featurized_objects=False) temp *= 1.01 print(action, 'final action') # Check for and log Solves if not res.status.is_invalid(): tried += 1 if res.status.is_solved(): solved += 1 print(orig_actions[i], 'orig action') print(task, "solved", res.status.is_solved()) error[i] = orig_actions[i] - base_action # Visualization if show: x, y, d = np.round(raw_action * fake.shape[0]) y = fake.shape[0] - y print(x, y, d) def generate_crosses(points): xx = [] yy = [] for x, y in points: xx.extend([x, x + 1, x - 1, x, x]) yy.extend([y, y, y, y + 1, y - 1]) return xx, yy xx, yy = [ x, (x + d) if (x + d) < fake.shape[0] - 1 else 62, x - d, x, x ], [ y, y, y, (y + d) if (y + d) < fake.shape[0] - 1 else 62, y - d ] xx, yy = generate_crosses(zip(xx, yy)) fake[yy, xx] = 0.5 os.makedirs(f'result/cgan_solver/vector_extractions', exist_ok=True) plt.imsave(f'result/cgan_solver/vector_extractions/{i}.png', fake) if not res.status.is_invalid(): os.makedirs(f'result/cgan_solver/scenes', exist_ok=True) plt.imsave(f'result/cgan_solver/scenes/{i}.png', res.images[0, ::-1]) else: print("invalid") plt.imshow( phyre.observations_to_float_rgb(sim.initial_scenes[i])) plt.show() print("solving percentage:", solved / tried, 'overall:', tried) print("mean x error:", np.mean(error[:, 0]), 'mean x abs error:', np.mean(np.abs(error[:, 0]))) print("mean y error:", np.mean(error[:, 1]), 'mean y abs error:', np.mean(np.abs(error[:, 1]))) print("mean r error:", np.mean(error[:, 2]), 'mean r abs error:', np.mean(np.abs(error[:, 2])))
def test(self): self.model.eval() if C.RPIN.VAE: losses = dict.fromkeys(self.loss_name, 0.0) box_p_step_losses = [0.0 for _ in range(self.ptest_size)] masks_step_losses = [0.0 for _ in range(self.ptest_size)] for batch_idx, (data, _, rois, gt_boxes, gt_masks, valid, g_idx) in enumerate(self.val_loader): with torch.no_grad(): data = data.to(self.device) rois = xyxy_to_rois(rois, batch=data.shape[0], time_step=data.shape[1], num_devices=self.num_gpus) labels = { 'boxes': gt_boxes.to(self.device), 'masks': gt_masks.to(self.device), 'valid': valid.to(self.device), } outputs = self.model(data, rois, num_rollouts=self.ptest_size, g_idx=g_idx, phase='test') self.loss(outputs, labels, 'test') # VAE multiple runs if C.RPIN.VAE: vae_best_mean = np.mean( np.array(self.box_p_step_losses[:self.ptest_size]) / self.loss_cnt) * 1e3 losses_t = self.losses.copy() box_p_step_losses_t = self.box_p_step_losses.copy() masks_step_losses_t = self.masks_step_losses.copy() for i in range(99): outputs = self.model(data, rois, num_rollouts=self.ptest_size, g_idx=g_idx, phase='test') self.loss(outputs, labels, 'test') mean_loss = np.mean( np.array(self.box_p_step_losses[:self.ptest_size]) / self.loss_cnt) * 1e3 if mean_loss < vae_best_mean: losses_t = self.losses.copy() box_p_step_losses_t = self.box_p_step_losses.copy() masks_step_losses_t = self.masks_step_losses.copy() vae_best_mean = mean_loss self._init_loss() for k, v in losses.items(): losses[k] += losses_t[k] for i in range(len(box_p_step_losses)): box_p_step_losses[i] += box_p_step_losses_t[i] masks_step_losses[i] += masks_step_losses_t[i] tprint(f'eval: {batch_idx}/{len(self.val_loader)}:' + ' ' * 20) if self.plot_image > 0: outputs = { 'boxes': outputs['boxes'].cpu().numpy(), 'masks': outputs['masks'].cpu().numpy() if C.RPIN.MASK_LOSS_WEIGHT else None, } outputs['boxes'][..., 0::2] *= self.input_width outputs['boxes'][..., 1::2] *= self.input_height outputs['boxes'] = xywh2xyxy(outputs['boxes'].reshape( -1, 4)).reshape( (data.shape[0], -1, C.RPIN.MAX_NUM_OBJS, 4)) labels = { 'boxes': labels['boxes'].cpu().numpy(), 'masks': labels['masks'].cpu().numpy(), } labels['boxes'][..., 0::2] *= self.input_width labels['boxes'][..., 1::2] *= self.input_height labels['boxes'] = xywh2xyxy(labels['boxes'].reshape( -1, 4)).reshape( (data.shape[0], -1, C.RPIN.MAX_NUM_OBJS, 4)) for i in range(rois.shape[0]): batch_size = C.SOLVER.BATCH_SIZE if not C.RPIN.VAE else 1 plot_image_idx = batch_size * batch_idx + i if plot_image_idx < self.plot_image: tprint(f'plotting: {plot_image_idx}' + ' ' * 20) video_idx, img_idx = self.val_loader.dataset.video_info[ plot_image_idx] video_name = self.val_loader.dataset.video_list[ video_idx] v = valid[i].numpy().astype(np.bool) pred_boxes_i = outputs['boxes'][i][:, v] gt_boxes_i = labels['boxes'][i][:, v] if 'PHYRE' in C.DATA_ROOT: im_data = phyre.observations_to_float_rgb( np.load(video_name).astype( np.uint8))[..., ::-1] a, b, c = video_name.split('/')[5:8] output_name = f'{a}_{b}_{c.replace(".npy", "")}' bg_image = np.load(video_name).astype(np.uint8) for fg_id in [1, 2, 3, 5]: bg_image[bg_image == fg_id] = 0 bg_image = phyre.observations_to_float_rgb( bg_image) else: bg_image = None image_list = sorted( glob( f'{video_name}/*{self.val_loader.dataset.image_ext}' )) im_name = image_list[img_idx] output_name = '_'.join( im_name.split('.')[0].split('/')[-2:]) # deal with image data here gt_boxes_i = labels['boxes'][i][:, v] im_data = get_im_data(im_name, gt_boxes_i[None, 0:1], C.DATA_ROOT, self.high_resolution_plot) if self.high_resolution_plot: scale_w = im_data.shape[1] / self.input_width scale_h = im_data.shape[0] / self.input_height pred_boxes_i[..., [0, 2]] *= scale_w pred_boxes_i[..., [1, 3]] *= scale_h gt_boxes_i[..., [0, 2]] *= scale_w gt_boxes_i[..., [1, 3]] *= scale_h pred_masks_i = None if C.RPIN.MASK_LOSS_WEIGHT: pred_masks_i = outputs['masks'][i][:, v] plot_rollouts(im_data, pred_boxes_i, gt_boxes_i, pred_masks_i, labels['masks'][i][:, v], output_dir=self.output_dir, output_name=output_name, bg_image=bg_image) if C.RPIN.VAE: self.losses = losses.copy() self.box_p_step_losses = box_p_step_losses.copy() self.loss_cnt = len(self.val_loader) print('\r', end='') print_msg = "" mean_loss = np.mean( np.array(self.box_p_step_losses[:self.ptest_size]) / self.loss_cnt) * 1e3 print_msg += f"{mean_loss:.3f} | " print_msg += f" | ".join([ "{:.3f}".format(self.losses[name] * 1e3 / self.loss_cnt) for name in self.loss_name ]) pprint(print_msg)
def gen_proposal(self, start_id=0, end_id=25): random.seed(0) np.random.seed(0) protocal = C.PHYRE_PROTOCAL fold_id = C.PHYRE_FOLD print(f'generate proposal for {protocal} fold {fold_id}') max_p_acts, max_n_acts, max_acts = 200, 800, 100000 self.proposal_dir = f'{self.output_dir.split("/")[-1]}_' \ f'p{max_p_acts}n{max_n_acts}a{max_acts // 1000}' eval_setup = f'ball_{protocal}_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) train_tasks, dev_tasks, test_tasks = phyre.get_fold( eval_setup, fold_id) # filter task train_tasks = train_tasks + dev_tasks candidate_list = [f'{i:05d}' for i in range(start_id, end_id)] for split in ['train', 'test']: train_list = [ task for task in train_tasks if task.split(':')[0] in candidate_list ] test_list = [ task for task in test_tasks if task.split(':')[0] in candidate_list ] if len(eval(f'{split}_list')) == 0: return simulator = phyre.initialize_simulator(eval(f'{split}_list'), action_tier) cache = phyre.get_default_100k_cache('ball') training_data = cache.get_sample(eval(f'{split}_list'), None) actions = cache.action_array[:max_acts] final_list = eval(f'{split}_list') t_list = tqdm(final_list, 'Task') for task_id, task in enumerate(t_list): box_cache_name = f'data/PHYRE_proposal/cache/{task.replace(":", "_")}_box.hkl' act_cache_name = f'data/PHYRE_proposal/cache/{task.replace(":", "_")}_act.hkl' use_cache = os.path.exists(box_cache_name) and os.path.exists( act_cache_name) if use_cache: acts = hickle.load(act_cache_name) all_boxes = hickle.load(box_cache_name) else: sim_statuses = training_data['simulation_statuses'][ task_id] pos_acts = actions[sim_statuses == 1] neg_acts = actions[sim_statuses == -1] np.random.shuffle(pos_acts) np.random.shuffle(neg_acts) pos_acts = pos_acts[:max_p_acts] neg_acts = neg_acts[:max_n_acts] acts = np.concatenate([pos_acts, neg_acts]) hickle.dump(acts, act_cache_name, mode='w', compression='gzip') all_boxes = [] valid_act_id = 0 for act_id, act in enumerate( tqdm(acts, 'Candidate Action', leave=False)): sim = simulator.simulate_action( task_id, act, stride=60, need_images=True, need_featurized_objects=True) if not use_cache: if act_id < len(pos_acts): assert sim.status == phyre.SimulationStatus.SOLVED else: assert sim.status == phyre.SimulationStatus.NOT_SOLVED assert sim.status != phyre.SimulationStatus.INVALID_INPUT raw_images = sim.images rst_images = np.stack([ np.ascontiguousarray( cv2.resize(rst_image, (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST)[::-1]) for rst_image in raw_images ]) # prepare input for network: image = cv2.resize(raw_images[0], (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) image = phyre.observations_to_float_rgb(image) # parse object objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] objs_color = np.array(objs_color)[objs_valid] num_objs = objs.shape[1] if use_cache: boxes = all_boxes[valid_act_id] valid_act_id += 1 else: boxes = np.zeros((1, num_objs, 5)) for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( objs[0][[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) boxes[0, o_id] = [o_id, x1, y1, x2, y2] all_boxes.append(boxes) data = image.transpose((2, 0, 1))[None, None, :] data = torch.from_numpy(data.astype(np.float32)) rois = torch.from_numpy(boxes[..., 1:].astype( np.float32))[None, :] bg_image = rst_images[0].copy() for fg_id in [1, 2, 3, 5]: bg_image[bg_image == fg_id] = 0 boxes, masks = self.generate_trajs(data, rois) rst_masks = np.stack([ self.render_mask_to_image(boxes[0, i], masks[0, i], images=bg_image.copy(), color=objs_color).astype( np.uint8) for i in range(self.pred_rollout) ]) output_dir = f'data/PHYRE_proposal/{self.proposal_dir}/{split}/' output_dir = output_dir + 'pos/' if sim.status == phyre.SimulationStatus.SOLVED else output_dir + 'neg/' output_dir = output_dir + f'{task.replace(":", "_")}/' os.makedirs(output_dir, exist_ok=True) rst_dict = {'gt_im': rst_images, 'pred_im': rst_masks} hickle.dump(rst_dict, f'{output_dir}/{act_id}.hkl', mode='w', compression='gzip') if not use_cache: all_boxes = np.stack(all_boxes) hickle.dump(all_boxes, box_cache_name, mode='w', compression='gzip')
def gen_single_task(task_list_item, action_tier_name, cache_name): random.seed(0) cache = phyre.get_default_100k_cache('ball') training_data = cache.get_sample([task_list_item], None) actions = cache.action_array sim_statuses = training_data['simulation_statuses'][0] simulator = phyre.initialize_simulator([task_list_item], action_tier_name) pos_acts = actions[sim_statuses == 1] neg_acts = actions[sim_statuses == -1] print( f'{simulator.task_ids[0].replace(":", "_")}: success: {len(pos_acts)}, fail: {len(neg_acts)}' ) task_id = simulator.task_ids[0] im_save_root = f'{cache_name}/images/{task_id.split(":")[0]}/{task_id.split(":")[1]}' fim_save_root = f'{cache_name}/full/{task_id.split(":")[0]}/{task_id.split(":")[1]}' bm_save_root = f'{cache_name}/labels/{task_id.split(":")[0]}/{task_id.split(":")[1]}' os.makedirs(im_save_root, exist_ok=True) os.makedirs(fim_save_root, exist_ok=True) os.makedirs(bm_save_root, exist_ok=True) np.random.shuffle(pos_acts) np.random.shuffle(neg_acts) pos_acts = pos_acts[:max_p_acts] neg_acts = neg_acts[:max_n_acts] acts = np.concatenate([pos_acts, neg_acts]) for act_id, action in enumerate(tqdm(acts)): sim = simulator.simulate_action(0, action, stride=60, need_images=True, need_featurized_objects=True) images = sim.images assert sim.status != 0 # filter out static objects objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] num_objs = objs.shape[1] boxes = np.zeros((len(images), num_objs, 5)) masks = np.zeros((len(images), num_objs, mask_size, mask_size)) full_images = np.zeros((len(images), input_h, input_w)) for im_id, (raw_image, obj) in enumerate(zip(images, objs)): # image = phyre.observations_to_float_rgb(raw_image) im_height = raw_image.shape[0] im_width = raw_image.shape[1] image = cv2.resize(raw_image, (input_w, input_h), interpolation=cv2.INTER_NEAREST) if im_id == 0: np.save(f'{im_save_root}/{act_id:03d}.npy', image) full_images[im_id] = image for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( obj[[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) assert len(h) > 0 and len(w) > 0 x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() masks[im_id, o_id] = cv2.resize( mask_im[y1:y2 + 1, x1:x2 + 1].astype(np.float32), (mask_size, mask_size)) >= 0.5 x1 *= (input_w - 1) / (im_width - 1) x2 *= (input_w - 1) / (im_width - 1) y1 *= (input_h - 1) / (im_height - 1) y2 *= (input_h - 1) / (im_height - 1) boxes[im_id, o_id] = [o_id, x1, y1, x2, y2] # debugging data generation # ---- uncomment below for visualize output # # debug box output # import matplotlib.pyplot as plt # plt.imshow(image) # for o_id in range(num_objs): # x1, y1, x2, y2 = boxes[t, o_id, 1:] # rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, linewidth=2, color='r') # plt.gca().add_patch(rect) # plt.savefig(os.path.join(save_dir, f'{t:03d}_debug.jpg')), plt.close() # # debug mask output # for o_id in range(num_objs): # mask_im = np.zeros((128, 128)) # x1, y1, x2, y2 = boxes[t, o_id, 1:].astype(np.int) # mask = cv2.resize(masks[t, o_id].astype(np.float32), (x2 - x1 + 1, y2 - y1 + 1)) # mask_im[y1:y2 + 1, x1:x2 + 1] = mask # # plt.imshow(mask_im) # plt.savefig(os.path.join(save_dir, f'{t:03d}_{o_id}_debug.jpg')), plt.close() # save bounding boxes hickle.dump(full_images, f'{fim_save_root}/{act_id:03d}_image.hkl', mode='w', compression='gzip') hickle.dump(int(sim.status == 1), f'{bm_save_root}/{act_id:03d}_label.hkl', mode='w', compression='gzip') hickle.dump(boxes, f'{bm_save_root}/{act_id:03d}_boxes.hkl', mode='w', compression='gzip') hickle.dump(masks, f'{bm_save_root}/{act_id:03d}_masks.hkl', mode='w', compression='gzip')
def _parse_image(self, video_name, vid_idx, img_idx): data = np.array([phyre.observations_to_float_rgb(np.load(video_name))], dtype=np.float).transpose((0, 3, 1, 2)) return data
def test(self): self.model.eval() if C.RIN.VAE: losses = dict.fromkeys(self.loss_name, 0.0) box_p_step_losses = [0.0 for _ in range(self.ptest_size)] masks_step_losses = [0.0 for _ in range(self.ptest_size)] for batch_idx, (data, data_pred, data_t, env_name, rois, gt_boxes, gt_masks, valid, module_valid, g_idx, _) in enumerate(self.val_loader): with torch.no_grad(): # decide module_valid here for evaluation mid = 0 # ball-only module_valid = module_valid[:, mid, :, :] if C.RIN.ROI_MASKING or C.RIN.ROI_CROPPING: # data should be (b x t x o x c x h x w) data = data.permute( (0, 2, 1, 3, 4, 5)) # (b, o, t, c, h, w) data = data.reshape((data.shape[0] * data.shape[1], ) + data.shape[2:]) # (b*o, t, c, h, w) data = data.to(self.device) pos_feat = xyxy_to_posf(rois, data.shape) rois = xyxy_to_rois(rois, batch=data.shape[0], time_step=data.shape[1], num_devices=self.num_gpus) labels = { 'boxes': gt_boxes.to(self.device), 'masks': gt_masks.to(self.device), 'valid': valid.to(self.device), 'module_valid': module_valid.to(self.device), } outputs = self.model(data, rois, pos_feat, valid, num_rollouts=self.ptest_size, g_idx=g_idx, phase='test') # # ********************************************************************************* # # VISUALIZATION - generate input image and GT outputs + model outputs # input_data = data.cpu().detach().numpy() # (128, 1, 3, 128, 128) # gt_data = data_pred.cpu().detach().numpy() # (128, 10, 3, 128, 128) # data_t = data_t.cpu().detach().numpy() # (128, 1, 128, 128) # validity = valid.cpu().detach().numpy() # (128, 6) # outputs_boxes = outputs['boxes'].cpu().detach().numpy() # (128, 10, 6, 4) # outputs_masks = outputs['masks'].cpu().detach().numpy() # (128, 10, 6, 21, 21) # np.save('save/'+str(batch_idx)+'input_data.npy', input_data) # np.save('save/'+str(batch_idx)+'gt_data.npy', gt_data) # np.save('save/'+str(batch_idx)+'data_t.npy', data_t) # np.save('save/'+str(batch_idx)+'validity.npy', validity) # np.save('save/'+str(batch_idx)+'outputs_boxes.npy', outputs_boxes) # np.save('save/'+str(batch_idx)+'outputs_masks.npy', outputs_masks) # # self.visualize_results(input_data, gt_data, outputs, data_t, validity, env_name) # # ********************************************************************************* self.loss(outputs, labels, 'test') # VAE multiple runs if C.RIN.VAE: vae_best_mean = np.mean( np.array(self.box_p_step_losses[:self.ptest_size]) / self.loss_cnt) * 1e3 losses_t = self.losses.copy() box_p_step_losses_t = self.box_p_step_losses.copy() masks_step_losses_t = self.masks_step_losses.copy() for i in range(99): outputs = self.model(data, rois, None, num_rollouts=self.ptest_size, g_idx=g_idx, phase='test') self.loss(outputs, labels, 'test') mean_loss = np.mean( np.array(self.box_p_step_losses[:self.ptest_size]) / self.loss_cnt) * 1e3 if mean_loss < vae_best_mean: losses_t = self.losses.copy() box_p_step_losses_t = self.box_p_step_losses.copy() masks_step_losses_t = self.masks_step_losses.copy() vae_best_mean = mean_loss self._init_loss() for k, v in losses.items(): losses[k] += losses_t[k] for i in range(len(box_p_step_losses)): box_p_step_losses[i] += box_p_step_losses_t[i] masks_step_losses[i] += masks_step_losses_t[i] tprint(f'eval: {batch_idx}/{len(self.val_loader)}:' + ' ' * 20) if self.plot_image > 0: outputs = { 'boxes': outputs['boxes'].cpu().numpy(), 'masks': outputs['masks'].cpu().numpy() if C.RIN.MASK_LOSS_WEIGHT else None, } outputs['boxes'][..., 0::2] *= self.input_width outputs['boxes'][..., 1::2] *= self.input_height outputs['boxes'] = xywh2xyxy(outputs['boxes'].reshape( -1, 4)).reshape((data.shape[0], -1, C.RIN.NUM_OBJS, 4)) labels = { 'boxes': labels['boxes'].cpu().numpy(), 'masks': labels['masks'].cpu().numpy(), } labels['boxes'][..., 0::2] *= self.input_width labels['boxes'][..., 1::2] *= self.input_height labels['boxes'] = xywh2xyxy(labels['boxes'].reshape( -1, 4)).reshape((data.shape[0], -1, C.RIN.NUM_OBJS, 4)) for i in range(rois.shape[0]): batch_size = C.SOLVER.BATCH_SIZE if not C.RIN.VAE else 1 plot_image_idx = batch_size * batch_idx + i if plot_image_idx < self.plot_image: tprint(f'plotting: {plot_image_idx}' + ' ' * 20) video_idx, img_idx = self.val_loader.dataset.video_info[ plot_image_idx] video_name = self.val_loader.dataset.video_list[ video_idx] v = valid[i].numpy().astype(np.bool) pred_boxes_i = outputs['boxes'][i][:, v] gt_boxes_i = labels['boxes'][i][:, v] if 'PHYRE' in C.DATA_ROOT: # [::-1] is to make it consistency with others where opencv is used im_data = phyre.observations_to_float_rgb( np.load(video_name).astype( np.uint8))[..., ::-1] a, b, c = video_name.split('/')[5:8] output_name = f'{a}_{b}_{c.replace(".npy", "")}' bg_image = np.load(video_name).astype(np.uint8) for fg_id in [1, 2, 3, 5]: bg_image[bg_image == fg_id] = 0 bg_image = phyre.observations_to_float_rgb( bg_image) # if f'{a}_{b}' not in [ # '00014_123', '00014_528', '00015_257', '00015_337', '00019_273', '00019_296' # ]: # continue # if f'{a}_{b}' not in [ # '00000_069', '00001_000', '00002_185', '00003_064', '00004_823', # '00005_111', '00006_033', '00007_090', '00008_177', '00009_930', # '00010_508', '00011_841', '00012_071', '00013_074', '00014_214', # '00015_016', '00016_844', '00017_129', '00018_192', '00019_244', # '00020_010', '00021_115', '00022_537', '00023_470', '00024_048' # ]: # continue else: bg_image = None image_list = sorted( glob( f'{video_name}/*{self.val_loader.dataset.image_ext}' )) im_name = image_list[img_idx] output_name = '_'.join( im_name.split('.')[0].split('/')[-2:]) # deal with image data here # plot rollout function only take care of the usage of plt # if output_name not in ['009_015', '009_031', '009_063', '039_038', '049_011', '059_033']: # continue # if output_name not in ['00002_00037', '00008_00047', '00011_00048', '00013_00036', # '00014_00033', '00020_00054', '00021_00013', '00024_00011']: # continue if output_name not in [ '0016_000', '0045_000', '0120_000', '0163_000' ]: continue gt_boxes_i = labels['boxes'][i][:, v] im_data = get_im_data(im_name, gt_boxes_i[None, 0:1], C.DATA_ROOT, self.high_resolution_plot) if self.high_resolution_plot: scale_w = im_data.shape[1] / self.input_width scale_h = im_data.shape[0] / self.input_height pred_boxes_i[..., [0, 2]] *= scale_w pred_boxes_i[..., [1, 3]] *= scale_h gt_boxes_i[..., [0, 2]] *= scale_w gt_boxes_i[..., [1, 3]] *= scale_h pred_masks_i = None if C.RIN.MASK_LOSS_WEIGHT: pred_masks_i = outputs['masks'][i][:, v] plot_rollouts(im_data, pred_boxes_i, gt_boxes_i, pred_masks_i, labels['masks'][i][:, v], output_dir=self.output_dir, output_name=output_name, bg_image=bg_image) if C.RIN.VAE: self.losses = losses.copy() self.box_p_step_losses = box_p_step_losses.copy() self.loss_cnt = len(self.val_loader) print('\r', end='') print_msg = "" mean_loss = np.mean( np.array(self.box_p_step_losses[:self.ptest_size]) / self.loss_cnt) * 1e3 print_msg += f"{mean_loss:.3f} | " print_msg += f" | ".join([ "{:.3f}".format(self.losses[name] * 1e3 / self.loss_cnt) for name in self.loss_name ]) pprint(print_msg)
def visualize_results(self, input_data, gt_data, outputs, data_t, valid, env_names): """Display input data, GT results and predicted results.""" # generate background images for this batch based on first frame bg_imgs = data_t[:, 0, :, :] for fg_id in [1, 2, 3, 5]: bg_imgs[bg_imgs == fg_id] = 0 bgrounds = np.array([ phyre.observations_to_float_rgb(bg_img.astype(int)) for bg_img in bg_imgs ], dtype=np.float).transpose((0, 3, 1, 2)) # adjust the output boxes outputs['boxes'], outputs['masks'] = outputs['boxes'].cpu().detach( ).numpy(), outputs['masks'].cpu().detach().numpy() outputs['boxes'][..., 0::2] *= self.input_width outputs['boxes'][..., 1::2] *= self.input_height outputs['boxes'] = xywh2xyxy(outputs['boxes'].reshape(-1, 4)).reshape( (input_data.shape[0], -1, C.RIN.NUM_OBJS, 4)) # get number of objects num_objs = np.sum(valid == 1, 1) for b in range(input_data.shape[0]): # # display input # plt.imshow(input_data[b][0].transpose(1,2,0)) # plt.savefig('images/input' + str(b) + '.png') # delete older images in the folders print("Starting file deletions") for item in os.listdir('images/gt/'): os.remove(os.path.join('images/gt/', item)) for item in os.listdir('images/pred/'): os.remove(os.path.join('images/pred/', item)) print("File deletions complete") # display GT results for t in range(gt_data.shape[1]): plt.title('GT') plt.text(20, 5, 'time: ' + str(t), horizontalalignment='center', verticalalignment='center') plt.imshow(gt_data[b][t].transpose(1, 2, 0)) plt.savefig('images/gt/' + str(t) + '.png') plt.close() # display model predictions for t in range(outputs['boxes'].shape[1]): bbox = outputs['boxes'][b, t, :, :] mask = outputs['masks'][b, t, :, :, :] env_name = env_names[b] bground, num_obj = bgrounds[b], num_objs[b] pred_img = self.img_from_object_feature( bground, bbox, mask, num_obj, env_name) plt.title('Prediction') plt.text(20, 5, 'time: ' + str(t), horizontalalignment='center', verticalalignment='center') plt.imshow(pred_img.transpose(1, 2, 0)) plt.savefig('images/pred/' + str(t) + '.png') plt.close() # generate video of this batch pdb.set_trace() self.save_video('images/pred/', b) self.save_video('images/gt/', b) # generate videos side by side p = subprocess.Popen([ 'ffmpeg', '-i', 'gt/batch' + str(b) + '.mp4', '-i', 'pred/batch' + str(b) + '.mp4', '-filter_complex', '[0:v]pad=iw*2:ih[int];[int][1:v]overlay=W/2:0[vid]', '-map', '[vid]', '-c:v', 'libx264', '-crf', '23', '-preset', 'veryfast', 'batch' + str(b) + '.mp4' ], cwd='images/') p.wait()
def test(self, start_id=0, end_id=25): random.seed(0) np.random.seed(0) protocal, fold_id = C.PHYRE_PROTOCAL, C.PHYRE_FOLD self.score_model.eval() print(f'testing using protocal {protocal} and fold {fold_id}') # setup the PHYRE evaluation split eval_setup = f'ball_{protocal}_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) _, _, test_tasks = phyre.get_fold(eval_setup, fold_id) # PHYRE setup candidate_list = [f'{i:05d}' for i in range(start_id, end_id)] # filter tasks test_list = [ task for task in test_tasks if task.split(':')[0] in candidate_list ] simulator = phyre.initialize_simulator(test_list, action_tier) # the action candidates are provided by the author of PHYRE benchmark num_actions = 10000 cache = phyre.get_default_100k_cache('ball') acts = cache.action_array[:num_actions] training_data = cache.get_sample(test_list, None) # some statistics variable when doing the evaluation auccess = np.zeros((len(test_list), 100)) batched_pred = C.SOLVER.BATCH_SIZE objs_color = None all_data, all_acts, all_rois, all_image = [], [], [], [] # cache the initial bounding boxes from the simulator os.makedirs('cache', exist_ok=True) t_list = tqdm(test_list, 'Task') for task_id, task in enumerate(t_list): sim_statuses = training_data['simulation_statuses'][task_id] confs, successes = [], [] boxes_cache_name = f'cache/{task.replace(":", "_")}.hkl' use_cache = os.path.exists(boxes_cache_name) all_boxes = hickle.load(boxes_cache_name) if use_cache else [] valid_act_id = 0 for act_id, act in enumerate( tqdm(acts, 'Candidate Action', leave=False)): sim = simulator.simulate_action(task_id, act, stride=60, need_images=True, need_featurized_objects=True) assert sim.status == sim_statuses[ act_id], 'sanity check not passed' if sim.status == phyre.SimulationStatus.INVALID_INPUT: if act_id == len(acts) - 1 and len( all_data) > 0: # final action is invalid conf_t = self.batch_score(all_data, all_rois, all_image, objs_color) confs = confs + conf_t all_data, all_acts, all_rois, all_image = [], [], [], [] continue successes.append(sim.status == phyre.SimulationStatus.SOLVED) # parse object, prepare input for network, the logic is the same as tools/gen_phyre.py image = cv2.resize(sim.images[0], (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) all_image.append(image[::-1]) image = phyre.observations_to_float_rgb(image) objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] objs_color = np.array(objs_color)[objs_valid] num_objs = objs.shape[1] if use_cache: boxes = all_boxes[valid_act_id] valid_act_id += 1 else: boxes = np.zeros((1, num_objs, 5)) for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( objs[0][[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) boxes[0, o_id] = [o_id, x1, y1, x2, y2] all_boxes.append(boxes) data = image.transpose((2, 0, 1))[None, None, :] data = torch.from_numpy(data.astype(np.float32)) rois = torch.from_numpy(boxes[..., 1:].astype(np.float32))[None, :] all_data.append(data) all_rois.append(rois) if len(all_data) % batched_pred == 0 or act_id == len( acts) - 1: conf_t = self.batch_score(all_data, all_rois, all_image, objs_color) confs = confs + conf_t all_data, all_rois, all_image = [], [], [] if not use_cache: all_boxes = np.stack(all_boxes) hickle.dump(all_boxes, boxes_cache_name, mode='w', compression='gzip') info = f'current AUCESS: ' top_acc = np.array(successes)[np.argsort(confs)[::-1]] for i in range(100): auccess[task_id, i] = int(np.sum(top_acc[:i + 1]) > 0) w = np.array([np.log(k + 1) - np.log(k) for k in range(1, 101)]) s = auccess[:task_id + 1].sum(0) / auccess[:task_id + 1].shape[0] info += f'{np.sum(w * s) / np.sum(w) * 100:.2f}' t_list.set_description(info)
'Unbox': [530, 500, 50] } #'Towers_B':[230, 550, 40], 'Towers_A':[325, 550, 40], json_dir = str(phyre.settings.VIRTUAL_TOOLS_DIR / 'Original/') if not os.path.exists(dst_dir): os.makedirs(dst_dir) for tnm in actions.keys(): print(tnm) with open(os.path.join(json_dir, tnm + '.json'), 'r') as f: btr = json.load(f) pgw = vt_converter.translate_to_phyre( creator_lib.creator.TaskCreator(), btr['world']) if tnm in actions.keys(): pgw.add('dynamic ball', scale=actions[tnm][-1] * 2 / VT_SCALE, center_x=actions[tnm][0] * PHYRE_SCALE / VT_SCALE, center_y=actions[tnm][1] * PHYRE_SCALE / VT_SCALE) raw_sc = phyre.simulator.simulate_scene(pgw.scene) sc = raw_sc[::100] print('finished simulation, ', len(sc), ' timesteps') fig, axs = plt.subplots(2, len(sc) // 2, figsize=(20, 10)) fig.tight_layout() plt.subplots_adjust(hspace=0.2, wspace=0.2) for i, (ax, s) in enumerate(zip(axs.flatten(), sc)): img = phyre.simulator.scene_to_raster(s) ax.title.set_text(f'Timestep {i}') good_img = phyre.observations_to_float_rgb(img) ax.imshow(good_img) plt.savefig(dst_dir + '/' + tnm + '_dynamic_action.png') plt.close()