def __init__(self, data_root, split, image_ext='.jpg'): self.data_root = data_root self.split = split self.image_ext = image_ext self.input_size = C.RPIN.INPUT_SIZE # number of input images self.pred_size = eval( f'C.RPIN.PRED_SIZE_{"TRAIN" if split == "train" else "TEST"}') self.seq_size = self.input_size + self.pred_size self.input_height, self.input_width = C.RPIN.INPUT_HEIGHT, C.RPIN.INPUT_WIDTH protocal = C.PHYRE_PROTOCAL fold = C.PHYRE_FOLD num_pos = 400 if split == 'train' else 100 num_neg = 1600 if split == 'train' else 400 eval_setup = f'ball_{protocal}_template' train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, fold) tasks = train_tasks + dev_tasks if split == 'train' else test_tasks action_tier = phyre.eval_setup_to_action_tier(eval_setup) # all the actions cache = phyre.get_default_100k_cache('ball') training_data = cache.get_sample(tasks, None) # (100000 x 3) actions = training_data['actions'] # (num_tasks x 100000) sim_statuses = training_data['simulation_statuses'] self.simulator = phyre.initialize_simulator(tasks, action_tier) self.video_info = np.zeros((0, 4)) for t_id, t in enumerate(tqdm(tasks)): sim_status = sim_statuses[t_id] pos_acts = actions[sim_status == 1].copy() neg_acts = actions[sim_status == -1].copy() np.random.shuffle(pos_acts) np.random.shuffle(neg_acts) pos_acts = pos_acts[:num_pos] neg_acts = neg_acts[:num_neg] acts = np.concatenate([pos_acts, neg_acts]) video_info = np.zeros((acts.shape[0], 4)) video_info[:, 0] = t_id video_info[:, 1:] = acts self.video_info = np.concatenate([self.video_info, video_info])
def train_kde(tasks, tier): cache = phyre.get_default_100k_cache(tier) all_solved_actions = {} for task_id in tasks: task_type = task_id.split(":")[0] statuses = cache.load_simulation_states(task_id) solved_actions = cache.action_array[statuses == phyre.simulation_cache.SOLVED, :] if task_type not in all_solved_actions: all_solved_actions[task_type] = solved_actions else: all_solved_actions[task_type] = np.concatenate( (all_solved_actions[task_type], solved_actions), 0) solved_actions_pdf = {} for task_type in all_solved_actions.keys(): solved_actions_pdf[task_type] = gaussian_kde(np.transpose( all_solved_actions[task_type]), bw_method="silverman") return solved_actions_pdf
def count_ball_sizes(task_ids, tier, ball_sizes, num_pos): cache = phyre.get_default_100k_cache(tier) simulator = phyre.initialize_simulator(task_ids, tier) num_solved = 0 positions = np.linspace(0, 1, num_pos) for task_index, task_id in tqdm(enumerate(task_ids), desc='Evaluate Tasks', total=len(task_ids)): statuses = cache.load_simulation_states(task_id) solved_actions = cache.action_array[statuses == phyre.simulation_cache.SOLVED, :] solved_actions[:, 2] = ball_sizes[abs(solved_actions[:, 2][None, :] - ball_sizes[:, None]).argmin(axis=0)] for solved_action in solved_actions: sim_result = simulator.simulate_action(task_index, solved_action, need_images=False) if sim_result.status.is_solved(): num_solved += 1 break return num_solved
def gen_proposal(self, start_id=0, end_id=25): random.seed(0) np.random.seed(0) protocal = C.PHYRE_PROTOCAL fold_id = C.PHYRE_FOLD print(f'generate proposal for {protocal} fold {fold_id}') max_p_acts, max_n_acts, max_acts = 200, 800, 100000 self.proposal_dir = f'{self.output_dir.split("/")[-1]}_' \ f'p{max_p_acts}n{max_n_acts}a{max_acts // 1000}' eval_setup = f'ball_{protocal}_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) train_tasks, dev_tasks, test_tasks = phyre.get_fold( eval_setup, fold_id) # filter task train_tasks = train_tasks + dev_tasks candidate_list = [f'{i:05d}' for i in range(start_id, end_id)] for split in ['train', 'test']: train_list = [ task for task in train_tasks if task.split(':')[0] in candidate_list ] test_list = [ task for task in test_tasks if task.split(':')[0] in candidate_list ] if len(eval(f'{split}_list')) == 0: return simulator = phyre.initialize_simulator(eval(f'{split}_list'), action_tier) cache = phyre.get_default_100k_cache('ball') training_data = cache.get_sample(eval(f'{split}_list'), None) actions = cache.action_array[:max_acts] final_list = eval(f'{split}_list') t_list = tqdm(final_list, 'Task') for task_id, task in enumerate(t_list): box_cache_name = f'data/PHYRE_proposal/cache/{task.replace(":", "_")}_box.hkl' act_cache_name = f'data/PHYRE_proposal/cache/{task.replace(":", "_")}_act.hkl' use_cache = os.path.exists(box_cache_name) and os.path.exists( act_cache_name) if use_cache: acts = hickle.load(act_cache_name) all_boxes = hickle.load(box_cache_name) else: sim_statuses = training_data['simulation_statuses'][ task_id] pos_acts = actions[sim_statuses == 1] neg_acts = actions[sim_statuses == -1] np.random.shuffle(pos_acts) np.random.shuffle(neg_acts) pos_acts = pos_acts[:max_p_acts] neg_acts = neg_acts[:max_n_acts] acts = np.concatenate([pos_acts, neg_acts]) hickle.dump(acts, act_cache_name, mode='w', compression='gzip') all_boxes = [] valid_act_id = 0 for act_id, act in enumerate( tqdm(acts, 'Candidate Action', leave=False)): sim = simulator.simulate_action( task_id, act, stride=60, need_images=True, need_featurized_objects=True) if not use_cache: if act_id < len(pos_acts): assert sim.status == phyre.SimulationStatus.SOLVED else: assert sim.status == phyre.SimulationStatus.NOT_SOLVED assert sim.status != phyre.SimulationStatus.INVALID_INPUT raw_images = sim.images rst_images = np.stack([ np.ascontiguousarray( cv2.resize(rst_image, (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST)[::-1]) for rst_image in raw_images ]) # prepare input for network: image = cv2.resize(raw_images[0], (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) image = phyre.observations_to_float_rgb(image) # parse object objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] objs_color = np.array(objs_color)[objs_valid] num_objs = objs.shape[1] if use_cache: boxes = all_boxes[valid_act_id] valid_act_id += 1 else: boxes = np.zeros((1, num_objs, 5)) for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( objs[0][[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) boxes[0, o_id] = [o_id, x1, y1, x2, y2] all_boxes.append(boxes) data = image.transpose((2, 0, 1))[None, None, :] data = torch.from_numpy(data.astype(np.float32)) rois = torch.from_numpy(boxes[..., 1:].astype( np.float32))[None, :] bg_image = rst_images[0].copy() for fg_id in [1, 2, 3, 5]: bg_image[bg_image == fg_id] = 0 boxes, masks = self.generate_trajs(data, rois) rst_masks = np.stack([ self.render_mask_to_image(boxes[0, i], masks[0, i], images=bg_image.copy(), color=objs_color).astype( np.uint8) for i in range(self.pred_rollout) ]) output_dir = f'data/PHYRE_proposal/{self.proposal_dir}/{split}/' output_dir = output_dir + 'pos/' if sim.status == phyre.SimulationStatus.SOLVED else output_dir + 'neg/' output_dir = output_dir + f'{task.replace(":", "_")}/' os.makedirs(output_dir, exist_ok=True) rst_dict = {'gt_im': rst_images, 'pred_im': rst_masks} hickle.dump(rst_dict, f'{output_dir}/{act_id}.hkl', mode='w', compression='gzip') if not use_cache: all_boxes = np.stack(all_boxes) hickle.dump(all_boxes, box_cache_name, mode='w', compression='gzip')
plt.show() if inspect == -2: for i in range(len(X)): plt.imsave(f"result/flownet/{i}_mask.png", local_masks[i, 0]) #x = tmp.reshape_as(X) return X * local_masks + points.reshape_as(X) if __name__ == "__main__": ## TESTING HANDCRAFTED ACTION EXTRACTOR WITH GROUNDTRUTH ACTION PATH # SETUP of phyre simulator SAVE_IMAGES = False eval_setup = 'ball_within_template' fold_id = 0 train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, fold_id) cache = phyre.get_default_100k_cache("ball") actions = cache.action_array print(cache.task_ids) tasks = train_tasks #+dev_tasks+test_tasks print(f"{len(tasks)} tasks") sim = phyre.initialize_simulator(tasks, 'ball') init_scenes = sim.initial_scenes X = T.tensor(scenes_to_channels(init_scenes)).float() print("Init Scenes Shape:\n", X.shape) # COLLECT action path action_paths = [] for i, t in enumerate(tasks): while True: action = actions[cache.load_simulation_states(t) == 1] if len(action) == 0:
task = task_dict[task_str] t1 = time.time() print(t1-t0,"Load Single Task Time") t0 = time.time() _, _, images,_ = phyre.simulator.magic_ponies(task, phyre.simulator.scene_if.UserInput(),need_images=True,stride = stride) t1 = time.time() print(t1-t0,"Sim Time") t0 = time.time() seq_data = ImgToObj.getObjectAndGoalSequence(images) t1 = time.time() print(t1-t0,"Sequence Contour Finding Time") t0 = time.time() cache = phyre.get_default_100k_cache(tier) statuses = cache.load_simulation_states(task_str) t1 = time.time() print(t1-t0,"Cache Load Time") t0 = time.time() good_actions = [] discrete_actions = cache.action_array.tolist() good_action_count = 0 solved_action_count = 0 goal_type = ImgToObj.Layer.dynamic_goal.value if goal_type not in images[0]: goal_type = ImgToObj.Layer.static_goal.value
return parser.parse_args() if __name__ == '__main__': args = arg_parse() output_dir = f'outputs/phys/PHYRECls/within/{args.output}' os.makedirs(output_dir, exist_ok=True) action_tier_name = 'ball' eval_setup = 'ball_within_template' train_tasks, dev_tasks, test_ids = phyre.get_fold(eval_setup, 1) task_ids = train_tasks + dev_tasks dev_tasks_ids = test_ids cache = phyre.get_default_100k_cache(action_tier_name) train_batch_size = 64 learning_rate = 0.0003 max_train_actions = None updates = 100000 negative_sampling_prob = 1.0 save_checkpoints_every = 10000 fusion_place = 'last' network_type = 'resnet18' balance_classes = 1 num_auccess_actions = 10000 eval_every = 20000 action_layers = 1 action_hidden_size = 256 cosine_scheduler = 1
def train(cls, task_ids: TaskIds, tier: str, **kwargs) -> State: cache = phyre.get_default_100k_cache(tier) return cls._train_with_cache(cache, task_ids, tier=tier, **kwargs)
def train(cls, train_task_ids, dev_task_ids, full_eval_fn, output_dir, summary_writer, cfg): cache = phyre.get_default_100k_cache(cfg.tier) trainer = hydra.utils.instantiate(cfg.agent.trainer) if cfg.max_train_actions: num_actions = cfg.max_train_actions else: num_actions = len(cache) def full_eval_from_model(model): return full_eval_fn( dict(model=model, trainer=trainer, num_actions=num_actions, cache=cache)) init_model = trainer.gen_model(cfg) if cfg.agent.weights_folder is not None: if cfg.agent.weights_folder == 'last': # Just use the last trained model cfg.agent.weights_folder = output_dir fwd_model = trainer.load_agent_from_folder( init_model, cfg.agent.weights_folder) else: if cfg.agent.init_weights_folder is not None: init_model = trainer.load_agent_from_folder( init_model, cfg.agent.init_weights_folder, strict=False) training_data = cache.get_sample( train_task_ids, cfg.train.data_loader.max_train_actions) task_indices, is_solved, actions, _, _ = ( neural_agent.compact_simulation_data_to_trainset( cfg.tier, **training_data)) if cfg.train.data_loader.fwd_model.use_obj_fwd_model: obj_fwd_model = obj_fwd_agent.ObjTrainer.gen_model(cfg) if cfg.train.data_loader.fwd_model.weights is not None: obj_fwd_model = trainer.load_agent_from_folder( obj_fwd_model, cfg.train.data_loader.fwd_model.weights) obj_fwd_model = obj_fwd_model.module.cpu() else: obj_fwd_model = None dataset = PhyreDataset( cfg.tier, train_task_ids, task_indices, is_solved, actions, cfg.simulator, mode='train', balance_classes=cfg.train.data_loader.balance_classes, hard_negatives=cfg.train.data_loader.hard_negatives, init_clip_ratio_to_sim=cfg.train.init_clip_ratio_to_sim, frames_per_clip=cfg.train.frames_per_clip, n_hist_frames=cfg.train.n_hist_frames, shuffle_indices=cfg.train.shuffle_indices, drop_objs=cfg.train.drop_objs, obj_fwd_model=obj_fwd_model) fwd_model = trainer.train(init_model, dataset, output_dir, summary_writer, full_eval_from_model, cfg) return dict(model=fwd_model, trainer=trainer, num_actions=num_actions, cache=cache)
def train(cls, task_ids, tier, simulation_cache_size=None, **kwargs): assert simulation_cache_size is None, 'Non-default cache size is not supported.' cache = phyre.get_default_100k_cache(tier) return cls._train_with_cache(cache, task_ids, tier=tier, **kwargs)
def evaluate_agent(task_ids, tier, solved_actions_pdf): cache = phyre.get_default_100k_cache(tier) evaluator = phyre.Evaluator(task_ids) simulator = phyre.initialize_simulator(task_ids, tier) task_data_dict = phyre.loader.load_compiled_task_dict() stride = 100 eval_stride = 2 goal = 3.0 * 60.0 / eval_stride empty_action = phyre.simulator.scene_if.UserInput() tasks_solved = 0 alpha = 1.0 N = 5 max_actions = 100 for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'): task_id = task_ids[task_index] task_type = task_id.split(":")[0] task_data = task_data_dict[task_id] statuses = cache.load_simulation_states(task_id) _, _, images, _ = phyre.simulator.magic_ponies(task_data, empty_action, need_images=True, stride=stride) evaluator.maybe_log_attempt(task_index, phyre.simulation_cache.NOT_SOLVED) seq_data = ImgToObj.getObjectAndGoalSequence(images) goal_type = ImgToObj.Layer.dynamic_goal.value if goal_type not in images[0]: goal_type = ImgToObj.Layer.static_goal.value tested_actions = np.array([[-1, -1, -1, 1, 0]]) solved_task = False max_score = 0 while evaluator.get_attempts_for_task( task_index ) < phyre.MAX_TEST_ATTEMPTS and not solved_task and max_score < 1.0: random_action = np.random.random_sample((1, 5)) if task_type in solved_actions_pdf and np.random.random_sample( ) >= .25: random_action[0, 0:3] = np.squeeze( solved_actions_pdf[task_type].resample(size=1)) test_action_dist = np.linalg.norm(tested_actions[:, 0:3] - random_action[:, 0:3], axis=1) if np.any(test_action_dist <= tested_actions[:, 3] ) and np.random.random_sample() >= .75: continue if ImgToObj.check_seq_action_intersect( images[0], seq_data, stride, goal_type, np.squeeze(random_action[0:3])): sim_result = simulator.simulate_action( task_index, np.squeeze(random_action[:, 0:3]), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result.status) if not sim_result.status.is_invalid(): score = ImgToObj.objectTouchGoalSequence(sim_result.images) eval_dist = .1 random_action[0, 3] = eval_dist random_action[0, 4] = 1.0 - np.linalg.norm( seq_data['object'][-1]['centroid'] - seq_data['goal'][-1]['centroid']) / 256.0 random_action[0, 4] += ImgToObj.objectTouchGoalSequence( sim_result.images) / goal if random_action[0, 4] > max_score: max_score = random_action[0, 4] tested_actions = np.concatenate( (tested_actions, random_action), 0) solved_task = sim_result.status.is_solved() tasks_solved += solved_task if not solved_task and evaluator.get_attempts_for_task( task_index) < phyre.MAX_TEST_ATTEMPTS: tested_actions = np.delete(tested_actions, 0, 0) theta = tested_actions[np.argmax(tested_actions[:, 4]), 0:3] theta_score = tested_actions[np.argmax(tested_actions[:, 4]), 4] while evaluator.get_attempts_for_task( task_index ) + 2 * N + 1 < phyre.MAX_TEST_ATTEMPTS and not solved_task: delta = np.random.normal(0, .2, (N, 3)) test_actions_pos = theta + delta test_actions_neg = theta - delta old_theta = np.copy(theta) for i in range(N): pos_score = 0 sim_result_pos = simulator.simulate_action( task_index, np.squeeze(test_actions_pos[i, :]), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result_pos.status) if not sim_result_pos.status.is_invalid(): pos_result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result_pos.images) pos_score = 1.0 - np.linalg.norm( pos_result_seq_data['object'][-1]['centroid'] - pos_result_seq_data['goal'][-1]['centroid']) / 256.0 pos_score += ImgToObj.objectTouchGoalSequence( sim_result_pos.images) / goal solved_task = sim_result_pos.status.is_solved() tasks_solved += solved_task neg_score = 0 sim_result_neg = simulator.simulate_action( task_index, np.squeeze(test_actions_neg[i, :]), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result_neg.status) if not sim_result_neg.status.is_invalid(): neg_result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result_neg.images) neg_score = 1.0 - np.linalg.norm( neg_result_seq_data['object'][-1]['centroid'] - neg_result_seq_data['goal'][-1]['centroid']) / 256.0 neg_score += ImgToObj.objectTouchGoalSequence( sim_result_neg.images) / goal solved_task = sim_result_neg.status.is_solved() tasks_solved += solved_task theta = theta + alpha / N * (pos_score - neg_score) * delta[i, :] sim_result = simulator.simulate_action(task_index, np.squeeze(theta), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result.status) if not sim_result.status.is_invalid(): result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result.images) score = 1.0 - np.linalg.norm( result_seq_data['object'][-1]['centroid'] - result_seq_data['goal'][-1]['centroid']) / 256.0 score += ImgToObj.objectTouchGoalSequence( sim_result.images) / goal solved_task = sim_result.status.is_solved() tasks_solved += solved_task print(tasks_solved, "Tasks solved out of ", len(task_ids), "Total Tasks") return (evaluator.get_aucess(), tasks_solved, len(task_ids))
def gen_single_task(task_list_item, action_tier_name, cache_name): random.seed(0) cache = phyre.get_default_100k_cache('ball') training_data = cache.get_sample([task_list_item], None) actions = cache.action_array sim_statuses = training_data['simulation_statuses'][0] simulator = phyre.initialize_simulator([task_list_item], action_tier_name) pos_acts = actions[sim_statuses == 1] neg_acts = actions[sim_statuses == -1] print( f'{simulator.task_ids[0].replace(":", "_")}: success: {len(pos_acts)}, fail: {len(neg_acts)}' ) task_id = simulator.task_ids[0] im_save_root = f'{cache_name}/images/{task_id.split(":")[0]}/{task_id.split(":")[1]}' fim_save_root = f'{cache_name}/full/{task_id.split(":")[0]}/{task_id.split(":")[1]}' bm_save_root = f'{cache_name}/labels/{task_id.split(":")[0]}/{task_id.split(":")[1]}' os.makedirs(im_save_root, exist_ok=True) os.makedirs(fim_save_root, exist_ok=True) os.makedirs(bm_save_root, exist_ok=True) np.random.shuffle(pos_acts) np.random.shuffle(neg_acts) pos_acts = pos_acts[:max_p_acts] neg_acts = neg_acts[:max_n_acts] acts = np.concatenate([pos_acts, neg_acts]) for act_id, action in enumerate(tqdm(acts)): sim = simulator.simulate_action(0, action, stride=60, need_images=True, need_featurized_objects=True) images = sim.images assert sim.status != 0 # filter out static objects objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] num_objs = objs.shape[1] boxes = np.zeros((len(images), num_objs, 5)) masks = np.zeros((len(images), num_objs, mask_size, mask_size)) full_images = np.zeros((len(images), input_h, input_w)) for im_id, (raw_image, obj) in enumerate(zip(images, objs)): # image = phyre.observations_to_float_rgb(raw_image) im_height = raw_image.shape[0] im_width = raw_image.shape[1] image = cv2.resize(raw_image, (input_w, input_h), interpolation=cv2.INTER_NEAREST) if im_id == 0: np.save(f'{im_save_root}/{act_id:03d}.npy', image) full_images[im_id] = image for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( obj[[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) assert len(h) > 0 and len(w) > 0 x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() masks[im_id, o_id] = cv2.resize( mask_im[y1:y2 + 1, x1:x2 + 1].astype(np.float32), (mask_size, mask_size)) >= 0.5 x1 *= (input_w - 1) / (im_width - 1) x2 *= (input_w - 1) / (im_width - 1) y1 *= (input_h - 1) / (im_height - 1) y2 *= (input_h - 1) / (im_height - 1) boxes[im_id, o_id] = [o_id, x1, y1, x2, y2] # debugging data generation # ---- uncomment below for visualize output # # debug box output # import matplotlib.pyplot as plt # plt.imshow(image) # for o_id in range(num_objs): # x1, y1, x2, y2 = boxes[t, o_id, 1:] # rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, linewidth=2, color='r') # plt.gca().add_patch(rect) # plt.savefig(os.path.join(save_dir, f'{t:03d}_debug.jpg')), plt.close() # # debug mask output # for o_id in range(num_objs): # mask_im = np.zeros((128, 128)) # x1, y1, x2, y2 = boxes[t, o_id, 1:].astype(np.int) # mask = cv2.resize(masks[t, o_id].astype(np.float32), (x2 - x1 + 1, y2 - y1 + 1)) # mask_im[y1:y2 + 1, x1:x2 + 1] = mask # # plt.imshow(mask_im) # plt.savefig(os.path.join(save_dir, f'{t:03d}_{o_id}_debug.jpg')), plt.close() # save bounding boxes hickle.dump(full_images, f'{fim_save_root}/{act_id:03d}_image.hkl', mode='w', compression='gzip') hickle.dump(int(sim.status == 1), f'{bm_save_root}/{act_id:03d}_label.hkl', mode='w', compression='gzip') hickle.dump(boxes, f'{bm_save_root}/{act_id:03d}_boxes.hkl', mode='w', compression='gzip') hickle.dump(masks, f'{bm_save_root}/{act_id:03d}_masks.hkl', mode='w', compression='gzip')
def evaluate_agent(task_ids, tier, solved_actions_pdf): cache = phyre.get_default_100k_cache(tier) evaluator = phyre.Evaluator(task_ids) simulator = phyre.initialize_simulator(task_ids, tier) task_data_dict = phyre.loader.load_compiled_task_dict() stride = 5 empty_action = phyre.simulator.scene_if.UserInput() tasks_solved = 0 for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'): task_id = task_ids[task_index] task_type = task_id.split(":")[0] task_data = task_data_dict[task_id] statuses = cache.load_simulation_states(task_id) _, _, images, _ = phyre.simulator.magic_ponies(task_data, empty_action, need_images=True, stride=stride) evaluator.maybe_log_attempt(task_index, phyre.simulation_cache.NOT_SOLVED) seq_data = ImgToObj.getObjectAndGoalSequence(images) goal_type = ImgToObj.Layer.dynamic_goal.value if goal_type not in images[0]: goal_type = ImgToObj.Layer.static_goal.value tested_actions = np.array([[-1, -1, -1, 1]]) solved_task = False while evaluator.get_attempts_for_task( task_index) < phyre.MAX_TEST_ATTEMPTS and not solved_task: random_action = np.random.random_sample((1, 4)) if task_type in solved_actions_pdf and np.random.random_sample( ) >= .25: random_action[0, 0:3] = np.squeeze( solved_actions_pdf[task_type].resample(size=1)) test_action_dist = np.linalg.norm(tested_actions[:, 0:3] - random_action[:, 0:3], axis=1) if np.any(test_action_dist <= tested_actions[:, 3] ) and np.random.random_sample() >= .75: continue if ImgToObj.check_seq_action_intersect( images[0], seq_data, stride, goal_type, np.squeeze(random_action[0:3])): eval_stride = 10 goal = 3.0 * 60.0 / eval_stride sim_result = simulator.simulate_action( task_index, np.squeeze(random_action[:, 0:3]), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result.status) if not sim_result.status.is_invalid(): score = ImgToObj.objectTouchGoalSequence(sim_result.images) eval_dist = .25 * (score == 0) + .1 random_action[0, 3] = eval_dist tested_actions = np.concatenate( (tested_actions, random_action), 0) solved_task = sim_result.status.is_solved() tasks_solved += solved_task print(tasks_solved, "Tasks solved out of ", len(task_ids), "Total Tasks") return (evaluator.get_aucess(), tasks_solved, len(task_ids))
def count_good_actions(task_ids, tier): cache = phyre.get_default_100k_cache(tier) task_data_dict = phyre.loader.load_compiled_task_dict() simulator = phyre.initialize_simulator(task_ids, tier) results = [] stride = 50 empty_action = phyre.simulator.scene_if.UserInput() max_actions = 100 for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'): task_id = task_ids[task_index] task_data = task_data_dict[task_id] statuses = cache.load_simulation_states(task_id) _, _, images, _ = phyre.simulator.magic_ponies(task_data, empty_action, need_images=True, stride=stride) seq_data = ImgToObj.getObjectAndGoalSequence(images) discrete_actions = cache.action_array.tolist() good_action_count = 0 solved_action_count = 0 goal_type = ImgToObj.Layer.dynamic_goal.value if goal_type not in images[0]: goal_type = ImgToObj.Layer.static_goal.value tested_actions_count = 0 tested_actions = np.array([[-1, -1, -1, 1]]) while tested_actions_count < max_actions and solved_action_count <= 0: random_action = np.random.random_sample((1, 4)) test_action_dist = np.linalg.norm(tested_actions[:, 0:3] - random_action[:, 0:3], axis=1) if np.any(test_action_dist <= tested_actions[:, 3] ) and np.random.random_sample() >= .25: continue if ImgToObj.check_seq_action_intersect( images[0], seq_data, stride, goal_type, np.squeeze(random_action[0:3])): eval_stride = 5 goal = 3.0 * 60.0 / eval_stride sim_result = simulator.simulate_action( task_index, np.squeeze(random_action[:, 0:3]), need_images=True, stride=eval_stride) if not sim_result.status.is_invalid(): good_action_count += 1 tested_actions_count += 1 score = ImgToObj.objectTouchGoalSequence(sim_result.images) eval_dist = 0.0 random_action[0, 3] = eval_dist tested_actions = np.concatenate( (tested_actions, random_action), 0) solved_task = sim_result.status.is_solved() solved_action_count += solved_task results.append({ 'num_good': good_action_count, 'num_solved': solved_action_count, 'num_total': len(discrete_actions) }) return results
def count_good_actions(task_ids, tier): cache = phyre.get_default_100k_cache(tier) task_data_dict = phyre.loader.load_compiled_task_dict() simulator = phyre.initialize_simulator(task_ids, tier) results = [] stride = 5 empty_action = phyre.simulator.scene_if.UserInput() max_actions = 100 alpha = 1.0 N = 5 eval_stride = 1 goal = 3.0 * 60.0 / eval_stride for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'): task_id = task_ids[task_index] task_data = task_data_dict[task_id] statuses = cache.load_simulation_states(task_id) _, _, images, _ = phyre.simulator.magic_ponies(task_data, empty_action, need_images=True, stride=stride) seq_data = ImgToObj.getObjectAndGoalSequence(images) discrete_actions = cache.action_array.tolist() good_action_count = 0 solved_action_count = 0 goal_type = ImgToObj.Layer.dynamic_goal.value if goal_type not in images[0]: goal_type = ImgToObj.Layer.static_goal.value tested_actions_count = 0 tested_actions = np.array([[-1, -1, -1, 1, 0]]) max_score = 0 while tested_actions_count < max_actions and solved_action_count <= 0 and max_score < 1.0: random_action = np.random.random_sample((1, 5)) test_action_dist = np.linalg.norm(tested_actions[:, 0:3] - random_action[:, 0:3], axis=1) if np.any(test_action_dist <= tested_actions[:, 3] ) and np.random.random_sample() >= .25: continue if ImgToObj.check_seq_action_intersect( images[0], seq_data, stride, goal_type, np.squeeze(random_action[0:3])): sim_result = simulator.simulate_action( task_index, np.squeeze(random_action[:, 0:3]), need_images=True, stride=eval_stride) if not sim_result.status.is_invalid(): result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result.images) good_action_count += 1 tested_actions_count += 1 eval_dist = .05 random_action[0, 3] = eval_dist random_action[0, 4] = 1.0 - np.linalg.norm( seq_data['object'][-1]['centroid'] - seq_data['goal'][-1]['centroid']) / 256.0 random_action[0, 4] += ImgToObj.objectTouchGoalSequence( sim_result.images) / goal if random_action[0, 4] > max_score: max_score = random_action[0, 4] tested_actions = np.concatenate( (tested_actions, random_action), 0) solved_task = sim_result.status.is_solved() solved_action_count += solved_task if solved_action_count <= 0: tested_actions = np.delete(tested_actions, 0, 0) theta = tested_actions[np.argmax(tested_actions[:, 4]), 0:3] theta_score = tested_actions[np.argmax(tested_actions[:, 4]), 4] b = 3 mu = np.zeros(3) esp = np.eye(3) while tested_actions_count + 2 * N + 1 < max_actions and solved_action_count <= 0: old_theta = np.copy(theta) scores = np.zeros((N, 2)) deltas = np.zeros((N, 3)) i = 0 while i < N and tested_actions_count + 2 * N + 1 < max_actions: delta = np.random.normal(0, .2, (1, 3)) test_action_pos = theta + delta test_action_neg = theta - delta pos_score = 0 sim_result_pos = simulator.simulate_action( task_index, np.squeeze(test_action_pos), need_images=True, stride=eval_stride) if not sim_result_pos.status.is_invalid(): tested_actions_count += 1 good_action_count += 1 pos_result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result_pos.images) pos_score = 1.0 - np.linalg.norm( pos_result_seq_data['object'][-1]['centroid'] - pos_result_seq_data['goal'][-1]['centroid']) / 256.0 pos_score += ImgToObj.objectTouchGoalSequence( sim_result_pos.images) / goal solved_task = sim_result_pos.status.is_solved() solved_action_count += solved_task neg_score = 0 sim_result_neg = simulator.simulate_action( task_index, np.squeeze(test_action_neg), need_images=True, stride=eval_stride) if not sim_result_neg.status.is_invalid(): tested_actions_count += 1 good_action_count += 1 neg_result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result_neg.images) neg_score = 1.0 - np.linalg.norm( neg_result_seq_data['object'][-1]['centroid'] - neg_result_seq_data['goal'][-1]['centroid']) / 256.0 neg_score += ImgToObj.objectTouchGoalSequence( sim_result_neg.images) / goal solved_task = sim_result_neg.status.is_solved() solved_action_count += solved_task if pos_score != 0 or neg_score != 0: deltas[i, :] = delta scores[i, 0] = pos_score scores[i, 1] = neg_score i += 1 max_scores = np.amax(scores, axis=1) max_index = np.argpartition(max_scores, b)[-b:] for i in max_index: if np.std(scores) == 0: print(task_id) theta = theta + (alpha / (b)) * (scores[i, 0] - scores[i, 1]) * deltas[i, :] else: theta = theta + (alpha / (b * np.std(scores[max_index]))) * ( scores[i, 0] - scores[i, 1]) * deltas[i, :] sim_result = simulator.simulate_action(task_index, np.squeeze(theta), need_images=True, stride=eval_stride) if not sim_result.status.is_invalid(): result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result.images) score = 1.0 - np.linalg.norm( result_seq_data['object'][-1]['centroid'] - result_seq_data['goal'][-1]['centroid']) / 256.0 score += ImgToObj.objectTouchGoalSequence( sim_result.images) / goal #print(task_id,theta,score,old_theta,theta_score,sim_result.status.is_solved(),tested_actions_count) good_action_count += 1 tested_actions_count += 1 solved_task = sim_result.status.is_solved() solved_action_count += solved_task else: theta = old_theta results.append({ 'num_good': good_action_count, 'num_solved': solved_action_count, 'num_total': len(discrete_actions) }) return results
def test(self, start_id=0, end_id=25): random.seed(0) np.random.seed(0) protocal, fold_id = C.PHYRE_PROTOCAL, C.PHYRE_FOLD self.score_model.eval() print(f'testing using protocal {protocal} and fold {fold_id}') # setup the PHYRE evaluation split eval_setup = f'ball_{protocal}_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) _, _, test_tasks = phyre.get_fold(eval_setup, fold_id) # PHYRE setup candidate_list = [f'{i:05d}' for i in range(start_id, end_id)] # filter tasks test_list = [ task for task in test_tasks if task.split(':')[0] in candidate_list ] simulator = phyre.initialize_simulator(test_list, action_tier) # the action candidates are provided by the author of PHYRE benchmark num_actions = 10000 cache = phyre.get_default_100k_cache('ball') acts = cache.action_array[:num_actions] training_data = cache.get_sample(test_list, None) # some statistics variable when doing the evaluation auccess = np.zeros((len(test_list), 100)) batched_pred = C.SOLVER.BATCH_SIZE objs_color = None all_data, all_acts, all_rois, all_image = [], [], [], [] # cache the initial bounding boxes from the simulator os.makedirs('cache', exist_ok=True) t_list = tqdm(test_list, 'Task') for task_id, task in enumerate(t_list): sim_statuses = training_data['simulation_statuses'][task_id] confs, successes = [], [] boxes_cache_name = f'cache/{task.replace(":", "_")}.hkl' use_cache = os.path.exists(boxes_cache_name) all_boxes = hickle.load(boxes_cache_name) if use_cache else [] valid_act_id = 0 for act_id, act in enumerate( tqdm(acts, 'Candidate Action', leave=False)): sim = simulator.simulate_action(task_id, act, stride=60, need_images=True, need_featurized_objects=True) assert sim.status == sim_statuses[ act_id], 'sanity check not passed' if sim.status == phyre.SimulationStatus.INVALID_INPUT: if act_id == len(acts) - 1 and len( all_data) > 0: # final action is invalid conf_t = self.batch_score(all_data, all_rois, all_image, objs_color) confs = confs + conf_t all_data, all_acts, all_rois, all_image = [], [], [], [] continue successes.append(sim.status == phyre.SimulationStatus.SOLVED) # parse object, prepare input for network, the logic is the same as tools/gen_phyre.py image = cv2.resize(sim.images[0], (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) all_image.append(image[::-1]) image = phyre.observations_to_float_rgb(image) objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] objs_color = np.array(objs_color)[objs_valid] num_objs = objs.shape[1] if use_cache: boxes = all_boxes[valid_act_id] valid_act_id += 1 else: boxes = np.zeros((1, num_objs, 5)) for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( objs[0][[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) boxes[0, o_id] = [o_id, x1, y1, x2, y2] all_boxes.append(boxes) data = image.transpose((2, 0, 1))[None, None, :] data = torch.from_numpy(data.astype(np.float32)) rois = torch.from_numpy(boxes[..., 1:].astype(np.float32))[None, :] all_data.append(data) all_rois.append(rois) if len(all_data) % batched_pred == 0 or act_id == len( acts) - 1: conf_t = self.batch_score(all_data, all_rois, all_image, objs_color) confs = confs + conf_t all_data, all_rois, all_image = [], [], [] if not use_cache: all_boxes = np.stack(all_boxes) hickle.dump(all_boxes, boxes_cache_name, mode='w', compression='gzip') info = f'current AUCESS: ' top_acc = np.array(successes)[np.argsort(confs)[::-1]] for i in range(100): auccess[task_id, i] = int(np.sum(top_acc[:i + 1]) > 0) w = np.array([np.log(k + 1) - np.log(k) for k in range(1, 101)]) s = auccess[:task_id + 1].sum(0) / auccess[:task_id + 1].shape[0] info += f'{np.sum(w * s) / np.sum(w) * 100:.2f}' t_list.set_description(info)
def enumerate_actions(): tier = 'ball' actions = phyre.get_default_100k_cache(tier).action_array[:10000] return actions
def test(self, start_id=0, end_id=25, fold_id=0, protocal='within'): random.seed(0) print(f'testing {protocal} fold {fold_id}') eval_setup = f'ball_{protocal}_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) _, _, test_tasks = phyre.get_fold(eval_setup, fold_id) # PHYRE setup candidate_list = [f'{i:05d}' for i in range(start_id, end_id)] # filter tasks test_list = [task for task in test_tasks if task.split(':')[0] in candidate_list] simulator = phyre.initialize_simulator(test_list, action_tier) # PHYRE evaluation num_all_actions = [1000, 2000, 5000, 8000, 10000] auccess = np.zeros((len(num_all_actions), len(test_list), 100)) batched_pred = C.SOLVER.BATCH_SIZE # DATA for network: all_data, all_acts, all_rois, all_image = [], [], [], [] cache = phyre.get_default_100k_cache('ball') acts = cache.action_array[:10000] # actions = cache.action_array[:100000] # training_data = cache.get_sample(test_list, None) pos_all, neg_all, pos_correct, neg_correct = 0, 0, 0, 0 objs_color = None for task_id, task in enumerate(test_list): confs, successes, num_valid_act_idx = [], [], [] boxes_cache_name = f'cache/{task.replace(":", "_")}.hkl' use_cache = os.path.exists(boxes_cache_name) all_boxes = hickle.load(boxes_cache_name) if use_cache else [] valid_act_cnt = 0 # sim_statuses = training_data['simulation_statuses'][task_id] # pos_acts = actions[sim_statuses == 1] # neg_acts = actions[sim_statuses == -1] # np.random.shuffle(pos_acts) # np.random.shuffle(neg_acts) # pos_acts = pos_acts[:50] # neg_acts = neg_acts[:200] # acts = np.concatenate([pos_acts, neg_acts]) for act_id, act in enumerate(acts): if act_id == 0: pprint(f'{task}: {task_id} / {len(test_list)}') sim = simulator.simulate_action(task_id, act, stride=60, need_images=True, need_featurized_objects=True) if sim.status == phyre.SimulationStatus.INVALID_INPUT: num_valid_act_idx.append(0) if act_id == len(acts) - 1 and len(all_data) > 0: # final action is invalid conf_t = self.batch_score(all_data, all_acts, all_rois, all_image, objs_color, task) confs = confs + conf_t all_data, all_acts, all_rois, all_image = [], [], [], [] continue num_valid_act_idx.append(1) successes.append(sim.status == phyre.SimulationStatus.SOLVED) if self.score_with_heuristic or self.score_with_mask: # parse object, prepare input for network: image = cv2.resize(sim.images[0], (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) all_image.append(image[::-1]) # for heuristic method to detect goal location, need to flip image = phyre.observations_to_float_rgb(image) objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] objs_color = np.array(objs_color)[objs_valid] num_objs = objs.shape[1] if use_cache: boxes = all_boxes[valid_act_cnt] valid_act_cnt += 1 else: boxes = np.zeros((1, num_objs, 5)) for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster(objs[0][[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) boxes[0, o_id] = [o_id, x1, y1, x2, y2] all_boxes.append(boxes) data = image.transpose((2, 0, 1))[None, None, :] data = torch.from_numpy(data.astype(np.float32)) rois = torch.from_numpy(boxes[..., 1:].astype(np.float32))[None, :] all_data.append(data) all_rois.append(rois) elif self.score_with_act: init = np.ascontiguousarray(simulator.initial_scenes[task_id][::-1]) init128 = cv2.resize(init, (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) all_data.append(torch.from_numpy(init128)) all_acts.append(torch.from_numpy(act[None, :])) elif self.score_with_vid_cls: rst_images = np.stack([np.ascontiguousarray( cv2.resize(rst_image, (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST)[::-1] ) for rst_image in sim.images]) all_data.append(torch.from_numpy(rst_images)) else: raise NotImplementedError if len(all_data) % batched_pred == 0 or act_id == len(acts) - 1: conf_t = self.batch_score(all_data, all_acts, all_rois, all_image, objs_color, task) confs = confs + conf_t all_data, all_acts, all_rois, all_image = [], [], [], [] if self.score_with_heuristic or self.score_with_mask: if not use_cache: all_boxes = np.stack(all_boxes) hickle.dump(all_boxes, boxes_cache_name, mode='w', compression='gzip') else: assert valid_act_cnt == len(all_boxes) pred = np.array(confs) >= 0.5 labels = np.array(successes) pos_all += (labels == 1).sum() neg_all += (labels == 0).sum() pos_correct += (pred == labels)[labels == 1].sum() neg_correct += (pred == labels)[labels == 0].sum() pos_acc = (pred == labels)[labels == 1].sum() / (labels == 1).sum() neg_acc = (pred == labels)[labels == 0].sum() / (labels == 0).sum() info = f'{pos_acc * 100:.1f} / {neg_acc * 100:.1f} ' # info = f'{task}: ' for j, num_acts in enumerate(num_all_actions): num_valid = np.sum(num_valid_act_idx[:num_acts]) top_acc = np.array(successes[:num_valid])[np.argsort(confs[:num_valid])[::-1]] for i in range(100): auccess[j, task_id, i] = int(np.sum(top_acc[:i + 1]) > 0) w = np.array([np.log(k + 1) - np.log(k) for k in range(1, 101)]) s = auccess[j, :task_id + 1].sum(0) / auccess[j, :task_id + 1].shape[0] info += f'{np.sum(w * s) / np.sum(w) * 100:.2f} {np.sum(successes[:num_valid])}/{num_acts // 1000}k | ' pprint(info) pprint(pos_correct, pos_all, pos_correct / pos_all) pprint(neg_correct, neg_all, neg_correct / neg_all) cache_output_dir = f'{self.output_dir.replace("figures/", "")}/' \ f'{self.proposal_setting}_{self.method}_{protocal}_fold_{fold_id}/' os.makedirs(cache_output_dir, exist_ok=True) print(cache_output_dir) stats = { 'auccess': auccess, 'p_c': pos_correct, 'p_a': pos_all, 'n_c': neg_correct, 'n_a': neg_all, } with open(f'{cache_output_dir}/{start_id}_{end_id}.pkl', 'wb') as f: pickle.dump(stats, f, pickle.HIGHEST_PROTOCOL)