def evaluate_simple_agent(tasks, tier): """Evaluates the random agent on the given tasks/tier. Args: tasks: A list of task instances (strings) in the split to evaluate. tier: A string of the action tier. Returns: A Evaluator object updated with the results of all the siulations. """ # Create a simulator for the task and tier. simulator = phyre.initialize_simulator(tasks, tier) evaluator = phyre.Evaluator(tasks) assert tuple(tasks) == simulator.task_ids tasks_solved = 0 for task_index in tqdm(range(len(tasks)), desc='Evaluate tasks'): domain = [{ 'name': 'var1', 'type': 'continuous', 'domain': (0, 1) }, { 'name': 'var2', 'type': 'continuous', 'domain': (0, 1) }, { 'name': 'var3', 'type': 'continuous', 'domain': (0, 1) }] X_init = np.array([[0.5, .5, .5]]) eval_result = evalAction(X_init, simulator, task_index, evaluator) Y_init = np.array([[eval_result['score']]]) X_step = X_init Y_step = Y_init solved_task = eval_result['solved'] while evaluator.get_attempts_for_task( task_index) < phyre.MAX_TEST_ATTEMPTS and not solved_task: bo_step = GPyOpt.methods.BayesianOptimization( f=None, domain=domain, X=X_step, Y=Y_step, de_duplication=True, acquisition_type='MPI', model_type='sparseGP') x_next = bo_step.suggest_next_locations() eval_result = evalAction(x_next, simulator, task_index, evaluator) X_step = np.vstack((X_step, x_next)) Y_step = np.vstack((Y_step, eval_result['score'])) #if eval_result['valid']: # print(tasks[task_index],evaluator.get_attempts_for_task(task_index),x_next,eval_result) if eval_result['solved']: solved_task = True print(tasks_solved, "Tasks solved out of ", len(tasks), "Total Tasks") return evaluator
def compact_simulation_data_to_trainset(action_tier_name: str, actions: np.ndarray, simulation_statuses: Sequence[int], task_ids: TaskIds) -> TrainData: """Converts result of SimulationCache.get_data() to pytorch tensors. The format of the output is the same as in create_balanced_eval_set. """ invalid = int(phyre.SimulationStatus.INVALID_INPUT) solved = int(phyre.SimulationStatus.SOLVED) task_indices = np.repeat(np.arange(len(task_ids)).reshape((-1, 1)), actions.shape[0], axis=1).reshape(-1) action_indices = np.repeat(np.arange(actions.shape[0]).reshape((1, -1)), len(task_ids), axis=0).reshape(-1) simulation_statuses = simulation_statuses.reshape(-1) good_statuses = simulation_statuses != invalid is_solved = torch.LongTensor( simulation_statuses[good_statuses].astype('uint8')) == solved action_indices = action_indices[good_statuses] actions = torch.FloatTensor(actions[action_indices]) task_indices = torch.LongTensor(task_indices[good_statuses]) simulator = phyre.initialize_simulator(task_ids, action_tier_name) observations = torch.LongTensor(simulator.initial_scenes) return task_indices, is_solved, actions, simulator, observations
def eval(self, state, task_ids, tier): model = state['model'] cache = state['cache'] # NOTE: Current agent is only using the actions that are seen in the training set, # though agent has the ability to rank the actions that are not seen in the training set actions = state['cache'].action_array[:self.params['rank_size']] model.cuda() simulator = phyre.initialize_simulator(task_ids, tier) observations = simulator.initial_scenes evaluator = phyre.Evaluator(task_ids) for task_index in range(len(task_ids)): task_id = simulator.task_ids[task_index] observation = observations[task_index] scores = self.neural_model.eval_actions( model, actions, self.params['eval_batch_size'], observation) # Rank of the actions in descending order action_order = np.argsort(-scores) # Result of the actions are already stored in cache statuses = cache.load_simulation_states(task_id) for action_id in action_order: if evaluator.get_attempts_for_task( task_index) >= self.params['max_attempts_per_task']: break status = phyre.SimulationStatus(statuses[action_id]) evaluator.maybe_log_attempt(task_index, status) return evaluator
def _create_balanced_eval_set(self, cache, task_ids, size, tier): """ Prepares balanced eval set to run through a network. Selects (size // 2) positive (task, action) pairs and (size // 2) negative pairs and represents them into pytorch tensors. The format of the output is the same as in _compact_simulation_data_to_trainset. """ task_ids = tuple(task_ids) data = cache.get_sample(task_ids) actions = data['actions'] simulation_statuses = data['simulation_statuses'] flat_statuses = simulation_statuses.reshape(-1) [positive_indices ] = (flat_statuses == int(phyre.SimulationStatus.SOLVED)).nonzero() [negative_indices] = (flat_statuses == int( phyre.SimulationStatus.NOT_SOLVED)).nonzero() half_size = size // 2 rng = np.random.RandomState(42) # If the number of indices are smaller than the half_size, indices can overlap positive_indices = rng.choice(positive_indices, half_size) negative_indices = rng.choice(negative_indices, half_size) all_indices = np.concatenate([positive_indices, negative_indices]) selected_actions = torch.FloatTensor(actions[all_indices % len(actions)]) is_solved = torch.LongTensor( flat_statuses[all_indices].astype('int')) > 0 task_indices = torch.LongTensor(all_indices // len(actions)) simulator = phyre.initialize_simulator(task_ids, tier) observations = torch.LongTensor(simulator.initial_scenes) return task_indices, is_solved, selected_actions, simulator, observations
def create_balanced_eval_set(cache, task_ids, size, tier): """Select a balanced set of max_size triples(task_id, status, action).""" task_ids = tuple(task_ids) data = cache.get_sample(task_ids) actions = data['actions'] simulation_statuses = data['simulation_statuses'] flat_statuses = simulation_statuses.reshape(-1) [positive_indices] = (flat_statuses == int( phyre.SimulationStatus.SOLVED)).nonzero() [negative_indices] = (flat_statuses == int( phyre.SimulationStatus.NOT_SOLVED)).nonzero() half_size = size // 2 rng = np.random.RandomState(42) positive_indices = rng.choice(positive_indices, half_size) negative_indices = rng.choice(negative_indices, half_size) all_indices = np.concatenate([positive_indices, negative_indices]) selected_actions = torch.FloatTensor(actions[all_indices % len(actions)]) is_solved = torch.LongTensor(flat_statuses[all_indices].astype('int')) > 0 task_indices = torch.LongTensor(all_indices // len(actions)) simulator = phyre.initialize_simulator(task_ids, tier) observations = torch.LongTensor(simulator.initial_scenes) return task_indices, is_solved, selected_actions, simulator, observations
def __init__(self, config): self.start_template_id = config.start_template_id self.end_template_id = config.end_template_id self.num_mods = config.num_mods self.action_tier = config.action_tier self.task_id = config.task_id self.action_mappers = action_mappers.ACTION_MAPPERS[self.action_tier]() tasks_map, _ = load_compiled_task_dict() task_ids = [] if self.task_id is not None: task_ids.append(self.task_id) else: self.template_num = self.end_template_id - self.start_template_id + 1 for i in range(self.start_template_id, self.end_template_id + 1, 1): task_mods = tasks_map[str(i).zfill(5)] for j in range(self.num_mods): task_ids.append(str(i).zfill(5) + ":" + task_mods[j]) # print("tasks: ",) # print(task_ids) self.simulator = phyre.initialize_simulator(task_ids, self.action_tier) self.tasks = [] for task_index in range(len(task_ids)): id = self.simulator.task_ids[task_index] initial_scene = self.simulator.initial_scenes[task_index] initial_featurized_objects = self.simulator.initial_featurized_objects[ task_index] task = Task(id, initial_scene, initial_featurized_objects) self.tasks.append(task)
def real_eval(cls, cache, model, actions, task_ids, tier, max_attempts_per_task, eval_batch_size, finetune_iterations, refine_iterations, refine_loss, refine_lr): # TODO: move to a flag. finetune_lr = 1e-4 model.cuda() simulator = phyre.initialize_simulator(task_ids, tier) observations = simulator.initial_scenes assert tuple(task_ids) == simulator.task_ids logging.info('Ranking %d actions and simulating top %d', len(actions), max_attempts_per_task) if refine_iterations > 0: logging.info( 'Will do refining for %d iterations with lr=%e and loss=%s', refine_iterations, refine_lr, refine_loss) evaluator = phyre.Evaluator(task_ids) for task_index in tqdm.trange(len(task_ids)): task_id = simulator.task_ids[task_index] if refine_iterations > 0: refined_actions = neural_agent.refine_actions( model, actions, observations[task_index], refine_lr, refine_iterations, eval_batch_size, refine_loss) else: refined_actions = actions scores = neural_agent.eval_actions(model, refined_actions, eval_batch_size, observations[task_index]) # Order of descendig scores. action_order = np.argsort(-scores) if not refine_iterations > 0: statuses = cache.load_simulation_states(task_id) finetune_data = [] for action_id in action_order: if evaluator.get_attempts_for_task( task_index) >= max_attempts_per_task: break action = refined_actions[action_id] if refine_iterations > 0: status = simulator.simulate_action( task_index, action, need_images=False, need_scenes=False).status else: status = phyre.SimulationStatus(statuses[action_id]) finetune_data.append((task_index, status, action)) evaluator.maybe_log_attempt(task_index, status) if evaluator.get_attempts_for_task(task_index) == 0: logging.warning('Made 0 attempts for task %s', task_id) if finetune_iterations > 0: neural_agent.finetune(model, finetune_data, simulator, finetune_lr, finetune_iterations) return evaluator
def _worker(action_tier, task_id, num_jobs, num_actions, job_id): action_path = ( phyre.simulation_cache.get_partial_cache_folder(num_actions) / action_tier / phyre.simulation_cache.ACTION_FILE_NAME) actions = joblib.load(action_path) sim = phyre.initialize_simulator([task_id], action_tier) actions = np.array_split(actions, num_jobs)[job_id] statuses = [ int(sim.simulate_action(0, action, need_images=False).status) for action in actions ] return statuses
def evaluate_simple_agent(tasks, tier): """Evaluates the random agent on the given tasks/tier. Args: tasks: A list of task instances (strings) in the split to evaluate. tier: A string of the action tier. Returns: A Evaluator object updated with the results of all the siulations. """ # Create a simulator for the task and tier. simulator = phyre.initialize_simulator(tasks, tier) evaluator = phyre.Evaluator(tasks) assert tuple(tasks) == simulator.task_ids tasks_solved = 0 for task_index in tqdm(range(len(tasks)), desc='Evaluate tasks'): simFunc = partial(evalAction, simulator=simulator, task_index=task_index, evaluator=evaluator) space = { 'x': hp.uniform('x', 0, 1), 'y': hp.uniform('y', 0, 1), 'r': hp.uniform('r', 0, 1), } trials = Trials() max_evals = 0 solved_task = False while evaluator.get_attempts_for_task( task_index) < phyre.MAX_TEST_ATTEMPTS and not solved_task: max_evals += phyre.MAX_TEST_ATTEMPTS - evaluator.get_attempts_for_task( task_index) best = fmin(simFunc, space=space, algo=tpe.suggest, max_evals=max_evals, trials=trials, rstate=random.seed(0), show_progressbar=False) counter = Counter(result['solved'] for result in trials.results) solved_task = counter[True] > 0 if solved_task: tasks_solved += 1 print(tasks_solved, "Tasks solved out of ", len(tasks), "Total Tasks") return evaluator
def create_balanced_eval_set(cache: phyre.SimulationCache, task_ids: TaskIds, size: int, tier: str) -> TrainData: """Prepares balanced eval set to run through a network. Selects (size // 2) positive (task, action) pairs and (size // 2) negative pairs and represents them in a compact formaer Returns a tuple (task_indices, is_solved, selected_actions, simulator, observations). Tensors task_indices, is_solved, selected_actions, observations, all have lengths size and correspond to some (task, action) pair. For any i the following is true: is_solved[i] is true iff selected_actions[i] solves task task_ids[task_indices[i]]. """ task_ids = tuple(task_ids) data = cache.get_sample(task_ids) actions = data['actions'] # Array [num_tasks, num_actions]. simulation_statuses = data['simulation_statuses'] flat_statuses = simulation_statuses.reshape(-1) [positive_indices ] = (flat_statuses == int(phyre.SimulationStatus.SOLVED)).nonzero() [negative_indices ] = (flat_statuses == int(phyre.SimulationStatus.NOT_SOLVED)).nonzero() half_size = size // 2 rng = np.random.RandomState(42) positive_indices = rng.choice(positive_indices, half_size) negative_indices = rng.choice(negative_indices, half_size) all_indices = np.concatenate([positive_indices, negative_indices]) selected_actions = torch.FloatTensor(actions[all_indices % len(actions)]) is_solved = torch.LongTensor(flat_statuses[all_indices].astype('int')) > 0 all_task_indices = np.arange(len(task_ids)).repeat(actions.shape[0]) positive_task_indices = all_task_indices[positive_indices] negative_task_indices = all_task_indices[negative_indices] task_indices = torch.LongTensor( np.concatenate([positive_task_indices, negative_task_indices])) simulator = phyre.initialize_simulator(task_ids, tier) observations = torch.LongTensor(simulator.initial_scenes) return task_indices, is_solved, selected_actions, simulator, observations
def __init__(self, data_root, split, image_ext='.jpg'): self.data_root = data_root self.split = split self.image_ext = image_ext self.input_size = C.RPIN.INPUT_SIZE # number of input images self.pred_size = eval( f'C.RPIN.PRED_SIZE_{"TRAIN" if split == "train" else "TEST"}') self.seq_size = self.input_size + self.pred_size self.input_height, self.input_width = C.RPIN.INPUT_HEIGHT, C.RPIN.INPUT_WIDTH protocal = C.PHYRE_PROTOCAL fold = C.PHYRE_FOLD num_pos = 400 if split == 'train' else 100 num_neg = 1600 if split == 'train' else 400 eval_setup = f'ball_{protocal}_template' train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, fold) tasks = train_tasks + dev_tasks if split == 'train' else test_tasks action_tier = phyre.eval_setup_to_action_tier(eval_setup) # all the actions cache = phyre.get_default_100k_cache('ball') training_data = cache.get_sample(tasks, None) # (100000 x 3) actions = training_data['actions'] # (num_tasks x 100000) sim_statuses = training_data['simulation_statuses'] self.simulator = phyre.initialize_simulator(tasks, action_tier) self.video_info = np.zeros((0, 4)) for t_id, t in enumerate(tqdm(tasks)): sim_status = sim_statuses[t_id] pos_acts = actions[sim_status == 1].copy() neg_acts = actions[sim_status == -1].copy() np.random.shuffle(pos_acts) np.random.shuffle(neg_acts) pos_acts = pos_acts[:num_pos] neg_acts = neg_acts[:num_neg] acts = np.concatenate([pos_acts, neg_acts]) video_info = np.zeros((acts.shape[0], 4)) video_info[:, 0] = t_id video_info[:, 1:] = acts self.video_info = np.concatenate([self.video_info, video_info])
def _gen_simulator(self): drop_objs_lst = () if not isinstance(self.drop_objs, int) and not self.drop_objs: # i.e. empty list, or None (and not an integer ID of obj to drop) pass elif isinstance(self.drop_objs, int): drop_objs_lst = (self.drop_objs, ) elif isinstance(self.drop_objs, str): drop_objs_lst = (int(el) for el in self.drop_objs.split(';')) else: logging.warning('Not sure what was passed as drop objs %s', self.drop_objs) drop_objs_lst = () simulator = phyre.initialize_simulator(self.task_ids, self.tier, drop_objs=drop_objs_lst) phyre_sim = hydra.utils.instantiate(self.simulator_cfg, simulator, self.obj_fwd_model) return phyre_sim
def evaluate_random_agent(tasks, tier): # Create a simulator for the task and tier. simulator = phyre.initialize_simulator(tasks, tier) evaluator = phyre.Evaluator(tasks) assert tuple(tasks) == simulator.task_ids images = [] actions = [] for task_index in tqdm_notebook(range(len(tasks)), desc='Evaluate tasks'): while evaluator.get_attempts_for_task( task_index) < phyre.MAX_TEST_ATTEMPTS: # Sample a random valid action from the simulator for the given action space. action = simulator.sample() # Simulate the given action and add the status from taking the action to the evaluator. status = simulator.simulate_action(task_index, action, need_images=True) stati = status.status actions.append(action) images.append(status.images) evaluator.maybe_log_attempt(task_index, stati) return evaluator, images, actions
def count_ball_sizes(task_ids, tier, ball_sizes, num_pos): cache = phyre.get_default_100k_cache(tier) simulator = phyre.initialize_simulator(task_ids, tier) num_solved = 0 positions = np.linspace(0, 1, num_pos) for task_index, task_id in tqdm(enumerate(task_ids), desc='Evaluate Tasks', total=len(task_ids)): statuses = cache.load_simulation_states(task_id) solved_actions = cache.action_array[statuses == phyre.simulation_cache.SOLVED, :] solved_actions[:, 2] = ball_sizes[abs(solved_actions[:, 2][None, :] - ball_sizes[:, None]).argmin(axis=0)] for solved_action in solved_actions: sim_result = simulator.simulate_action(task_index, solved_action, need_images=False) if sim_result.status.is_solved(): num_solved += 1 break return num_solved
def _compact_simulation_data_to_trainset(self, tier, data): """ Converts result of SimulationCache.get_data() to pytorch tensors. Returns a tuple (task_indices, is_solved, selected_actions, simulator, observations). task_indices, is_solved, selected_actions, observations are all tensors corresponding to (task, action) pair is_solved[i] is true iff selected_actions[i] solves task(task_ids[task_indices[i]]). """ actions = data['actions'] simulation_statuses = data['simulation_statuses'] task_ids = data['task_ids'] invalid = int(phyre.SimulationStatus.INVALID_INPUT) solved = int(phyre.SimulationStatus.SOLVED) # Making indices to build the (task, action) pair task_indices = np.repeat(np.arange(len(task_ids)).reshape((-1, 1)), actions.shape[0], axis=1).reshape(-1) action_indices = np.repeat(np.arange(actions.shape[0]).reshape( (1, -1)), len(task_ids), axis=0).reshape(-1) # len(simulation_statues) = len(task) * len(action) simulation_statuses = simulation_statuses.reshape(-1) # Filter for the valid actions good_statuses = simulation_statuses != invalid is_solved = torch.LongTensor( simulation_statuses[good_statuses].astype('uint8')) == solved action_indices = action_indices[good_statuses] actions = torch.FloatTensor(actions[action_indices]) task_indices = torch.LongTensor(task_indices[good_statuses]) simulator = phyre.initialize_simulator(task_ids, tier) observations = torch.LongTensor(simulator.initial_scenes) #pdb.set_trace() return task_indices, is_solved, actions, simulator, observations
def eval(cls, state: State, task_ids: TaskIds, max_attempts_per_task: int, tier: str, **kwargs): cache = state['cache'] evaluator = phyre.Evaluator(task_ids) simulator = phyre.initialize_simulator(task_ids, tier) assert tuple(task_ids) == simulator.task_ids for i, task_id in enumerate(task_ids): statuses = cache.load_simulation_states(task_id) valid_mask = statuses != phyre.simulation_cache.INVALID actions, statuses = cache.action_array[valid_mask], statuses[ valid_mask] for action, status in zip(actions, statuses): if evaluator.get_attempts_for_task(i) >= max_attempts_per_task: break if cls.in_prior(action, simulator._tasks[i].scene.bodies): evaluator.maybe_log_attempt(i, status) else: print("Not enough actions in prior", task_id, evaluator.get_attempts_for_task(i)) return evaluator
def simulate_result(chosen_action, chosen_score, model_number, generation_number): eval_setup = 'ball_cross_template' fold_id = 0 # For simplicity, we will just use one fold for evaluation. train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, 0) action_tier = phyre.eval_setup_to_action_tier(eval_setup) tasks = dev_tasks[0:1] simulator = phyre.initialize_simulator(tasks, action_tier) evaluator = phyre.Evaluator(tasks) # Simulate the given action and add the status from taking the action to the evaluator. simulation_result = simulator.simulate_action(0, chosen_action, need_images=True, need_featurized_objects=True) simulation_score = sf.ScoreFunctionValue(simulation_result) pair = np.array([chosen_action, simulation_score]) timestr = time.strftime("%Y%m%d-%H%M%S") score_pair = [ chosen_score, simulation_score, model_number, generation_number ] score_string = "ScoreLog" + timestr path = "/home/kyra/Desktop/phyre/agents/Scores" np.save(os.path.join(path, score_string), score_pair) return pair, simulation_result
def compact_simulation_data_to_trainset(action_tier_name, actions, simulation_statuses, task_ids): invalid = int(phyre.SimulationStatus.INVALID_INPUT) solved = int(phyre.SimulationStatus.SOLVED) task_indices = np.repeat( np.arange(len(task_ids)).reshape((-1, 1)), actions.shape[0], axis=1).reshape(-1) action_indices = np.repeat( np.arange(actions.shape[0]).reshape((1, -1)), len(task_ids), axis=0).reshape(-1) simulation_statuses = simulation_statuses.reshape(-1) good_statuses = simulation_statuses != invalid is_solved = torch.LongTensor( simulation_statuses[good_statuses].astype('uint8')) == solved action_indices = action_indices[good_statuses] actions = torch.FloatTensor(actions[action_indices]) task_indices = torch.LongTensor(task_indices[good_statuses]) simulator = phyre.initialize_simulator(task_ids, action_tier_name) observations = torch.LongTensor(simulator.initial_scenes) return task_indices, is_solved, actions, simulator, observations
def test(self, start_id=0, end_id=25): random.seed(0) np.random.seed(0) protocal, fold_id = C.PHYRE_PROTOCAL, C.PHYRE_FOLD self.score_model.eval() print(f'testing using protocal {protocal} and fold {fold_id}') # setup the PHYRE evaluation split eval_setup = f'ball_{protocal}_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) _, _, test_tasks = phyre.get_fold(eval_setup, fold_id) # PHYRE setup candidate_list = [f'{i:05d}' for i in range(start_id, end_id)] # filter tasks test_list = [ task for task in test_tasks if task.split(':')[0] in candidate_list ] simulator = phyre.initialize_simulator(test_list, action_tier) # the action candidates are provided by the author of PHYRE benchmark num_actions = 10000 cache = phyre.get_default_100k_cache('ball') acts = cache.action_array[:num_actions] training_data = cache.get_sample(test_list, None) # some statistics variable when doing the evaluation auccess = np.zeros((len(test_list), 100)) batched_pred = C.SOLVER.BATCH_SIZE objs_color = None all_data, all_acts, all_rois, all_image = [], [], [], [] # cache the initial bounding boxes from the simulator os.makedirs('cache', exist_ok=True) t_list = tqdm(test_list, 'Task') for task_id, task in enumerate(t_list): sim_statuses = training_data['simulation_statuses'][task_id] confs, successes = [], [] boxes_cache_name = f'cache/{task.replace(":", "_")}.hkl' use_cache = os.path.exists(boxes_cache_name) all_boxes = hickle.load(boxes_cache_name) if use_cache else [] valid_act_id = 0 for act_id, act in enumerate( tqdm(acts, 'Candidate Action', leave=False)): sim = simulator.simulate_action(task_id, act, stride=60, need_images=True, need_featurized_objects=True) assert sim.status == sim_statuses[ act_id], 'sanity check not passed' if sim.status == phyre.SimulationStatus.INVALID_INPUT: if act_id == len(acts) - 1 and len( all_data) > 0: # final action is invalid conf_t = self.batch_score(all_data, all_rois, all_image, objs_color) confs = confs + conf_t all_data, all_acts, all_rois, all_image = [], [], [], [] continue successes.append(sim.status == phyre.SimulationStatus.SOLVED) # parse object, prepare input for network, the logic is the same as tools/gen_phyre.py image = cv2.resize(sim.images[0], (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) all_image.append(image[::-1]) image = phyre.observations_to_float_rgb(image) objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] objs_color = np.array(objs_color)[objs_valid] num_objs = objs.shape[1] if use_cache: boxes = all_boxes[valid_act_id] valid_act_id += 1 else: boxes = np.zeros((1, num_objs, 5)) for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( objs[0][[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) boxes[0, o_id] = [o_id, x1, y1, x2, y2] all_boxes.append(boxes) data = image.transpose((2, 0, 1))[None, None, :] data = torch.from_numpy(data.astype(np.float32)) rois = torch.from_numpy(boxes[..., 1:].astype(np.float32))[None, :] all_data.append(data) all_rois.append(rois) if len(all_data) % batched_pred == 0 or act_id == len( acts) - 1: conf_t = self.batch_score(all_data, all_rois, all_image, objs_color) confs = confs + conf_t all_data, all_rois, all_image = [], [], [] if not use_cache: all_boxes = np.stack(all_boxes) hickle.dump(all_boxes, boxes_cache_name, mode='w', compression='gzip') info = f'current AUCESS: ' top_acc = np.array(successes)[np.argsort(confs)[::-1]] for i in range(100): auccess[task_id, i] = int(np.sum(top_acc[:i + 1]) > 0) w = np.array([np.log(k + 1) - np.log(k) for k in range(1, 101)]) s = auccess[:task_id + 1].sum(0) / auccess[:task_id + 1].shape[0] info += f'{np.sum(w * s) / np.sum(w) * 100:.2f}' t_list.set_description(info)
def gen_proposal(self, start_id=0, end_id=25): random.seed(0) np.random.seed(0) protocal = C.PHYRE_PROTOCAL fold_id = C.PHYRE_FOLD print(f'generate proposal for {protocal} fold {fold_id}') max_p_acts, max_n_acts, max_acts = 200, 800, 100000 self.proposal_dir = f'{self.output_dir.split("/")[-1]}_' \ f'p{max_p_acts}n{max_n_acts}a{max_acts // 1000}' eval_setup = f'ball_{protocal}_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) train_tasks, dev_tasks, test_tasks = phyre.get_fold( eval_setup, fold_id) # filter task train_tasks = train_tasks + dev_tasks candidate_list = [f'{i:05d}' for i in range(start_id, end_id)] for split in ['train', 'test']: train_list = [ task for task in train_tasks if task.split(':')[0] in candidate_list ] test_list = [ task for task in test_tasks if task.split(':')[0] in candidate_list ] if len(eval(f'{split}_list')) == 0: return simulator = phyre.initialize_simulator(eval(f'{split}_list'), action_tier) cache = phyre.get_default_100k_cache('ball') training_data = cache.get_sample(eval(f'{split}_list'), None) actions = cache.action_array[:max_acts] final_list = eval(f'{split}_list') t_list = tqdm(final_list, 'Task') for task_id, task in enumerate(t_list): box_cache_name = f'data/PHYRE_proposal/cache/{task.replace(":", "_")}_box.hkl' act_cache_name = f'data/PHYRE_proposal/cache/{task.replace(":", "_")}_act.hkl' use_cache = os.path.exists(box_cache_name) and os.path.exists( act_cache_name) if use_cache: acts = hickle.load(act_cache_name) all_boxes = hickle.load(box_cache_name) else: sim_statuses = training_data['simulation_statuses'][ task_id] pos_acts = actions[sim_statuses == 1] neg_acts = actions[sim_statuses == -1] np.random.shuffle(pos_acts) np.random.shuffle(neg_acts) pos_acts = pos_acts[:max_p_acts] neg_acts = neg_acts[:max_n_acts] acts = np.concatenate([pos_acts, neg_acts]) hickle.dump(acts, act_cache_name, mode='w', compression='gzip') all_boxes = [] valid_act_id = 0 for act_id, act in enumerate( tqdm(acts, 'Candidate Action', leave=False)): sim = simulator.simulate_action( task_id, act, stride=60, need_images=True, need_featurized_objects=True) if not use_cache: if act_id < len(pos_acts): assert sim.status == phyre.SimulationStatus.SOLVED else: assert sim.status == phyre.SimulationStatus.NOT_SOLVED assert sim.status != phyre.SimulationStatus.INVALID_INPUT raw_images = sim.images rst_images = np.stack([ np.ascontiguousarray( cv2.resize(rst_image, (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST)[::-1]) for rst_image in raw_images ]) # prepare input for network: image = cv2.resize(raw_images[0], (self.input_width, self.input_height), interpolation=cv2.INTER_NEAREST) image = phyre.observations_to_float_rgb(image) # parse object objs_color = sim.featurized_objects.colors objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color] objs = sim.featurized_objects.features[:, objs_valid, :] objs_color = np.array(objs_color)[objs_valid] num_objs = objs.shape[1] if use_cache: boxes = all_boxes[valid_act_id] valid_act_id += 1 else: boxes = np.zeros((1, num_objs, 5)) for o_id in range(num_objs): mask = phyre.objects_util.featurized_objects_vector_to_raster( objs[0][[o_id]]) mask_im = phyre.observations_to_float_rgb(mask) mask_im[mask_im == 1] = 0 mask_im = mask_im.sum(-1) > 0 [h, w] = np.where(mask_im) x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max() x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1) y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1) boxes[0, o_id] = [o_id, x1, y1, x2, y2] all_boxes.append(boxes) data = image.transpose((2, 0, 1))[None, None, :] data = torch.from_numpy(data.astype(np.float32)) rois = torch.from_numpy(boxes[..., 1:].astype( np.float32))[None, :] bg_image = rst_images[0].copy() for fg_id in [1, 2, 3, 5]: bg_image[bg_image == fg_id] = 0 boxes, masks = self.generate_trajs(data, rois) rst_masks = np.stack([ self.render_mask_to_image(boxes[0, i], masks[0, i], images=bg_image.copy(), color=objs_color).astype( np.uint8) for i in range(self.pred_rollout) ]) output_dir = f'data/PHYRE_proposal/{self.proposal_dir}/{split}/' output_dir = output_dir + 'pos/' if sim.status == phyre.SimulationStatus.SOLVED else output_dir + 'neg/' output_dir = output_dir + f'{task.replace(":", "_")}/' os.makedirs(output_dir, exist_ok=True) rst_dict = {'gt_im': rst_images, 'pred_im': rst_masks} hickle.dump(rst_dict, f'{output_dir}/{act_id}.hkl', mode='w', compression='gzip') if not use_cache: all_boxes = np.stack(all_boxes) hickle.dump(all_boxes, box_cache_name, mode='w', compression='gzip')
def evaluate_agent(task_ids, tier, solved_actions_pdf): cache = phyre.get_default_100k_cache(tier) evaluator = phyre.Evaluator(task_ids) simulator = phyre.initialize_simulator(task_ids, tier) task_data_dict = phyre.loader.load_compiled_task_dict() stride = 100 eval_stride = 2 goal = 3.0 * 60.0 / eval_stride empty_action = phyre.simulator.scene_if.UserInput() tasks_solved = 0 alpha = 1.0 N = 5 max_actions = 100 for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'): task_id = task_ids[task_index] task_type = task_id.split(":")[0] task_data = task_data_dict[task_id] statuses = cache.load_simulation_states(task_id) _, _, images, _ = phyre.simulator.magic_ponies(task_data, empty_action, need_images=True, stride=stride) evaluator.maybe_log_attempt(task_index, phyre.simulation_cache.NOT_SOLVED) seq_data = ImgToObj.getObjectAndGoalSequence(images) goal_type = ImgToObj.Layer.dynamic_goal.value if goal_type not in images[0]: goal_type = ImgToObj.Layer.static_goal.value tested_actions = np.array([[-1, -1, -1, 1, 0]]) solved_task = False max_score = 0 while evaluator.get_attempts_for_task( task_index ) < phyre.MAX_TEST_ATTEMPTS and not solved_task and max_score < 1.0: random_action = np.random.random_sample((1, 5)) if task_type in solved_actions_pdf and np.random.random_sample( ) >= .25: random_action[0, 0:3] = np.squeeze( solved_actions_pdf[task_type].resample(size=1)) test_action_dist = np.linalg.norm(tested_actions[:, 0:3] - random_action[:, 0:3], axis=1) if np.any(test_action_dist <= tested_actions[:, 3] ) and np.random.random_sample() >= .75: continue if ImgToObj.check_seq_action_intersect( images[0], seq_data, stride, goal_type, np.squeeze(random_action[0:3])): sim_result = simulator.simulate_action( task_index, np.squeeze(random_action[:, 0:3]), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result.status) if not sim_result.status.is_invalid(): score = ImgToObj.objectTouchGoalSequence(sim_result.images) eval_dist = .1 random_action[0, 3] = eval_dist random_action[0, 4] = 1.0 - np.linalg.norm( seq_data['object'][-1]['centroid'] - seq_data['goal'][-1]['centroid']) / 256.0 random_action[0, 4] += ImgToObj.objectTouchGoalSequence( sim_result.images) / goal if random_action[0, 4] > max_score: max_score = random_action[0, 4] tested_actions = np.concatenate( (tested_actions, random_action), 0) solved_task = sim_result.status.is_solved() tasks_solved += solved_task if not solved_task and evaluator.get_attempts_for_task( task_index) < phyre.MAX_TEST_ATTEMPTS: tested_actions = np.delete(tested_actions, 0, 0) theta = tested_actions[np.argmax(tested_actions[:, 4]), 0:3] theta_score = tested_actions[np.argmax(tested_actions[:, 4]), 4] while evaluator.get_attempts_for_task( task_index ) + 2 * N + 1 < phyre.MAX_TEST_ATTEMPTS and not solved_task: delta = np.random.normal(0, .2, (N, 3)) test_actions_pos = theta + delta test_actions_neg = theta - delta old_theta = np.copy(theta) for i in range(N): pos_score = 0 sim_result_pos = simulator.simulate_action( task_index, np.squeeze(test_actions_pos[i, :]), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result_pos.status) if not sim_result_pos.status.is_invalid(): pos_result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result_pos.images) pos_score = 1.0 - np.linalg.norm( pos_result_seq_data['object'][-1]['centroid'] - pos_result_seq_data['goal'][-1]['centroid']) / 256.0 pos_score += ImgToObj.objectTouchGoalSequence( sim_result_pos.images) / goal solved_task = sim_result_pos.status.is_solved() tasks_solved += solved_task neg_score = 0 sim_result_neg = simulator.simulate_action( task_index, np.squeeze(test_actions_neg[i, :]), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result_neg.status) if not sim_result_neg.status.is_invalid(): neg_result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result_neg.images) neg_score = 1.0 - np.linalg.norm( neg_result_seq_data['object'][-1]['centroid'] - neg_result_seq_data['goal'][-1]['centroid']) / 256.0 neg_score += ImgToObj.objectTouchGoalSequence( sim_result_neg.images) / goal solved_task = sim_result_neg.status.is_solved() tasks_solved += solved_task theta = theta + alpha / N * (pos_score - neg_score) * delta[i, :] sim_result = simulator.simulate_action(task_index, np.squeeze(theta), need_images=True, stride=eval_stride) evaluator.maybe_log_attempt(task_index, sim_result.status) if not sim_result.status.is_invalid(): result_seq_data = ImgToObj.getObjectAndGoalSequence( sim_result.images) score = 1.0 - np.linalg.norm( result_seq_data['object'][-1]['centroid'] - result_seq_data['goal'][-1]['centroid']) / 256.0 score += ImgToObj.objectTouchGoalSequence( sim_result.images) / goal solved_task = sim_result.status.is_solved() tasks_solved += solved_task print(tasks_solved, "Tasks solved out of ", len(task_ids), "Total Tasks") return (evaluator.get_aucess(), tasks_solved, len(task_ids))
cache = phyre.get_default_100k_cache(tier) statuses = cache.load_simulation_states(task_str) actions = cache.action_array.tolist() valid_actions = [] print(len(actions)) for action_id, action in enumerate(actions): if statuses[action_id] != phyre.simulation_cache.INVALID: valid_actions.append(action) actions = valid_actions print(len(actions)) simulator = phyre.initialize_simulator([task_str], tier) initial_scene = simulator.initial_scenes[0] frame_data = ImgToObj.getObjectAndGoalSequence([initial_scene]) goal_type = ImgToObj.Layer.dynamic_goal.value if goal_type not in initial_scene: goal_type = ImgToObj.Layer.static_goal.value goal_data = frame_data['goal'][0] object_data = frame_data['object'][0] goal_bb = goal_data['bb'] goal_center = goal_data['centroid'] object_bb = object_data['bb'] object_center = object_data['centroid']
def get_auccess(solver, tasks, solve_noise=False, save_tries=False, brute=False): if save_tries: font = ImageFont.truetype("/usr/share/fonts/truetype/ubuntu/Ubuntu-R.ttf", 10) eval_setup = 'ball_within_template' sim = phyre.initialize_simulator(tasks, 'ball') init_scenes = T.tensor([[cv2.resize((scene==channel).astype(float), (32,32)) for channel in range(2,7)] for scene in sim.initial_scenes]).float().flip(-2) eva = phyre.Evaluator(tasks) # Get Actions from solver: if brute: all_actions = solver.get_actions(tasks, init_scenes, brute =True) else: all_actions = solver.get_actions(tasks, init_scenes) #L.info(list(zip(tasks, all_actions))) #return 0 # Loop through actions for t_idx, task in enumerate(tasks): # Get 100 actions from solver if solve_noise: # expects one action for task task_actions = [all_actions[t_idx]] else: # expects 100 actions for task task_actions = all_actions[t_idx] # Loop through actions for j, action in enumerate(task_actions): # Setting up visualization array vis_wid = 64 vis_stack = T.zeros(6,10,vis_wid,vis_wid,3) vis_count = 1 # Simulate action res = sim.simulate_action(t_idx, action, need_featurized_objects=False) # Refining if invalid Action t = 0 temp = 1 base_action = action.copy() L.info(base_action, 'base action') # Checking for valid action while res.status.is_invalid(): t += 1 action = base_action + (np.random.rand(3)-0.5)*0.05*temp L.info(action, f"potential action for task {task}") res = sim.simulate_action(t_idx, action, need_featurized_objects=False) temp *= 1.01 if temp <5 else 1 #assert(t>500, "too many invalid tries") L.info(action, 'valid action') # Log first Attempt eva.maybe_log_attempt(t_idx, res.status) # Visualizing first attempt if save_tries: for i in range(min(len(res.images), 10)): vis_stack[0,i] = T.tensor(cv2.resize(phyre.observations_to_uint8_rgb(res.images[i]), (vis_wid,vis_wid))) # Collecting 100 Actions if solve noise warning_flag = False if solve_noise: base_action = action temp = 1 error = False t = 0 delta_generator = action_delta_generator() # Looping while less then 100 attempts while eva.attempts_per_task_index[t_idx]<100: # Searching for new action while not solved if not res.status.is_solved(): """ OLD APPROACH action = base_action + (np.random.rand(3)-0.5)*np.array([0.3,0.05,0.05])*temp temp *= 1.01 if temp <5 else 1 """ if t<1000: action = base_action + delta_generator.__next__() res = sim.simulate_action(t_idx, action, need_featurized_objects=False) eva.maybe_log_attempt(t_idx, res.status) t += 1 else: if not warning_flag: L.info(f"WARNING can't find valid action for {task}") warning_flag = True error = True eva.maybe_log_attempt(t_idx, phyre.SimulationStatus.NOT_SOLVED) # if solved -> repeating action else: if not warning_flag: L.info(f"{task} solved after", eva.attempts_per_task_index[t_idx]) # Visualization if save_tries and not error: for i in range(min(len(res.images), 10)): vis_stack[5,i] = T.tensor(cv2.resize(phyre.observations_to_uint8_rgb(res.images[i]), (vis_wid,vis_wid))) warning_flag = True eva.maybe_log_attempt(t_idx, res.status) # Visualization if save_tries and not error and not res.status.is_invalid() and t and vis_count<5: for i in range(min(len(res.images), 10)): vis_stack[vis_count,i] = T.tensor(cv2.resize(phyre.observations_to_uint8_rgb(res.images[i]), (vis_wid,vis_wid))) vis_count +=1 if not warning_flag and not res.status.is_solved() and eva.attempts_per_task_index[t_idx]==100: L.info(f"{task} not solved") vis_batch(vis_stack, f'result/solver/pyramid', f"{task}_attempts") # Not Solve Noise Case else: # Visualization if save_tries and not res.status.is_invalid() and vis_count<5: for i in range(min(len(res.images), 10)): vis_stack[vis_count,i] = T.tensor(cv2.resize(phyre.observations_to_uint8_rgb(res.images[i]), (vis_wid,vis_wid))) vis_count +=1 if res.status.is_solved(): L.info(f"{task} solved after", eva.attempts_per_task_index[t_idx]) vis_batch(vis_stack, f'result/solver/pyramid', f"{task}_attempts") while eva.attempts_per_task_index[t_idx]<100: eva.maybe_log_attempt(t_idx, res.status) break return eva.get_auccess()
#import pymunk #from pymunk import Vec2d import phyre import ImgToObj eval_setup = 'ball_cross_template' action_tier = phyre.eval_setup_to_action_tier(eval_setup) task_str = '00004:243' task_data_dict = phyre.loader.load_compiled_task_dict() simulator = phyre.initialize_simulator([task_str], action_tier) action = [.84,.82,.41] #action = [0.8720595836408028,0.1325951705610915,0.40200105882798676] #action = [0,0,0] t0 = time.time() sim_result = simulator.simulate_action(0, action, need_images=True,stride=2) t1 = time.time() print(t1-t0,"Sim Time") print(sim_result.status.is_solved()) t0 = time.time() seq_data = ImgToObj.getObjectAndGoalSequence(sim_result.images) t1 = time.time() print(t1-t0,"Sequence Contour Finding Time")
import numpy as np import phyre from tqdm import tqdm_notebook import animations random.seed(0) # Evaluation Setup eval_setup = 'ball_cross_template' fold_id = 0 # For simplicity, we will just use one fold for evaluation. train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, 0) action_tier = phyre.eval_setup_to_action_tier(eval_setup) tasks = dev_tasks[0:1] print((tasks)) simulator = phyre.initialize_simulator(tasks, action_tier) actions = simulator.build_discrete_action_space(max_actions=1000) def evaluate_random_agent(tasks, tier): # Create a simulator for the task and tier. simulator = phyre.initialize_simulator(tasks, tier) evaluator = phyre.Evaluator(tasks) assert tuple(tasks) == simulator.task_ids images = [] actions = [] for task_index in tqdm_notebook(range(len(tasks)), desc='Evaluate tasks'): while evaluator.get_attempts_for_task( task_index) < phyre.MAX_TEST_ATTEMPTS: # Sample a random valid action from the simulator for the given action space. action = simulator.sample()
return X * local_masks + points.reshape_as(X) if __name__ == "__main__": ## TESTING HANDCRAFTED ACTION EXTRACTOR WITH GROUNDTRUTH ACTION PATH # SETUP of phyre simulator SAVE_IMAGES = False eval_setup = 'ball_within_template' fold_id = 0 train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, fold_id) cache = phyre.get_default_100k_cache("ball") actions = cache.action_array print(cache.task_ids) tasks = train_tasks #+dev_tasks+test_tasks print(f"{len(tasks)} tasks") sim = phyre.initialize_simulator(tasks, 'ball') init_scenes = sim.initial_scenes X = T.tensor(scenes_to_channels(init_scenes)).float() print("Init Scenes Shape:\n", X.shape) # COLLECT action path action_paths = [] for i, t in enumerate(tasks): while True: action = actions[cache.load_simulation_states(t) == 1] if len(action) == 0: action = [sim.sample()] action = random.choice(action) res = sim.simulate_action(i, action, stride=20) print(i, res.status.is_solved(), len(res.images), end='\r') if type(res.images) != type(None):
def solve(tasks, generator, save_images=False, force_collect=False, static=256, show=False): # Collect Interaction Data data_path = './data/cgan_solver' if not os.path.exists(data_path + '/interactions.pickle') or force_collect: os.makedirs(data_path, exist_ok=True) wid = generator.width print("Collecting Data") collect_interactions(data_path, tasks, 10, stride=1, size=(wid, wid), static=static) with open(data_path + '/interactions.pickle', 'rb') as fs: X = T.tensor(pickle.load(fs), dtype=T.float) with open(data_path + '/info.pickle', 'rb') as fs: info = pickle.load(fs) tasklist = info['tasks'] positions = info['pos'] orig_actions = info['action'] print('loaded dataset with shape:', X.shape) #data_set = T.utils.data.TensorDataset(X) #data_loader = T.utils.data.DataLoader(data_set, batch_size=BATCH_SIZE, shuffle=False) # Sim SETUP print('Succesfull collection for tasks:\n', tasklist) eval_setup = 'ball_within_template' sim = phyre.initialize_simulator(tasklist, 'ball') eva = phyre.Evaluator(tasklist) # Solve Loop error = np.zeros((X.shape[0], 3)) generator.eval() solved, tried = 0, 0 for i, task in enumerate(tasklist): # generate 'fake' noise = T.randn(1, generator.noise_dim) with T.no_grad(): fake = generator((X[i, :generator.s_chan])[None], noise)[0, 0] #action = np.array(pic_to_action_vector(fake, r_fac=1.8)) action = np.array(pic_to_action_vector(fake.numpy(), r_fac=1)) raw_action = action.copy() # PROCESS ACTION print(action, 'raw') # shift by half to get relative position action[:2] -= 0.5 # multiply by half because extracted scope is already half of the scene action[:2] *= 0.5 # multiply by 4 because action value is always 4*diameter -> 8*radius, but scope is already halfed -> 8*0.5*radius action[2] *= 4 # finetuning action[2] *= 1.0 print(action, 'relativ') pos = positions[i] print(pos) action[:2] += pos print(action, 'added') res = sim.simulate_action(i, action, need_featurized_objects=True) # Noisy tries while invalid actions t = 0 temp = 1 base_action = action while res.status.is_invalid() and t < 200: t += 1 action = base_action + (np.random.rand(3) - 0.5) * 0.01 * temp res = sim.simulate_action(i, action, need_featurized_objects=False) temp *= 1.01 print(action, 'final action') # Check for and log Solves if not res.status.is_invalid(): tried += 1 if res.status.is_solved(): solved += 1 print(orig_actions[i], 'orig action') print(task, "solved", res.status.is_solved()) error[i] = orig_actions[i] - base_action # Visualization if show: x, y, d = np.round(raw_action * fake.shape[0]) y = fake.shape[0] - y print(x, y, d) def generate_crosses(points): xx = [] yy = [] for x, y in points: xx.extend([x, x + 1, x - 1, x, x]) yy.extend([y, y, y, y + 1, y - 1]) return xx, yy xx, yy = [ x, (x + d) if (x + d) < fake.shape[0] - 1 else 62, x - d, x, x ], [ y, y, y, (y + d) if (y + d) < fake.shape[0] - 1 else 62, y - d ] xx, yy = generate_crosses(zip(xx, yy)) fake[yy, xx] = 0.5 os.makedirs(f'result/cgan_solver/vector_extractions', exist_ok=True) plt.imsave(f'result/cgan_solver/vector_extractions/{i}.png', fake) if not res.status.is_invalid(): os.makedirs(f'result/cgan_solver/scenes', exist_ok=True) plt.imsave(f'result/cgan_solver/scenes/{i}.png', res.images[0, ::-1]) else: print("invalid") plt.imshow( phyre.observations_to_float_rgb(sim.initial_scenes[i])) plt.show() print("solving percentage:", solved / tried, 'overall:', tried) print("mean x error:", np.mean(error[:, 0]), 'mean x abs error:', np.mean(np.abs(error[:, 0]))) print("mean y error:", np.mean(error[:, 1]), 'mean y abs error:', np.mean(np.abs(error[:, 1]))) print("mean r error:", np.mean(error[:, 2]), 'mean r abs error:', np.mean(np.abs(error[:, 2])))
def real_eval(cls, cache, model, trainer, actions_per_task, task_ids, tier, max_attempts_per_task, cfg): # Parameters if cfg.eval.batch_size: eval_batch_size = cfg.eval.batch_size else: eval_batch_size = cfg.train.batch_size * cfg.eval.bs_multiplier # Since scaling the eval batch size by this, should scale down the # workers for training, since the memory might blow up cfg.eval.data_loader.num_workers = max( 16, cfg.train.data_loader.num_workers // cfg.eval.bs_multiplier) logging.warning('Scaling down eval workers to %d', cfg.eval.data_loader.num_workers) assert eval_batch_size % cfg.num_gpus == 0, 'Otherwise will error' model.cuda() # Not passing in the drop_objs here, since this simulator is only # used for evaluation simulator = phyre.initialize_simulator(task_ids, tier) assert tuple(task_ids) == simulator.task_ids # New evaluation code only does 1 prediction no matter length of rollout evaluator = EvaluatorWrapper(simulator, task_ids, 1, max_attempts_per_task) if cfg.eval.store_vis: # Subselect actions that are diverse (some solve, others don't) # And keep a small subset of actions, not too many # eval_batch_size = 4 # What I typically visualize for # store_vis_nsamples = max(cfg.eval.store_vis_nsamples, # eval_batch_size) # Make this consistent, to keep numbers always consistent store_vis_nsamples = cfg.eval.store_vis_nsamples actions_override = None if cfg.eval.store_vis_actions is not None: actions_override = np.array( cls.read_actions_override(cfg.eval.store_vis_actions)) eval_batch_size = len(actions_override) task_indices = [] actions = [] # Running separately to be able to match the set that was used # in before multi-worker testing for task_index, task_id in enumerate( tqdm.tqdm(task_ids, 'gen-ing task IDs for vis')): if actions_override is not None: this_actions = actions_override else: _, _, this_actions, _, _ = ( neural_agent.create_balanced_eval_set( cache, [task_id], store_vis_nsamples, cfg.tier)) actions.append(this_actions) task_indices += [task_index] * len(this_actions) task_indices = np.array(task_indices) actions = np.concatenate(actions, axis=0) else: task_indices = np.repeat(np.arange(len(task_ids)), len(actions_per_task)) actions = np.concatenate([actions_per_task] * len(task_ids), axis=0) logging.info('Ranking %d actions and simulating top %d', len(actions) // len(task_ids), max_attempts_per_task) assert len(task_indices) == len(actions) if cfg.train.data_loader.fwd_model.use_obj_fwd_model: obj_fwd_model = obj_fwd_agent.ObjTrainer.gen_model(cfg) if cfg.train.data_loader.fwd_model.weights is not None: obj_fwd_model = trainer.load_agent_from_folder( obj_fwd_model, cfg.train.data_loader.fwd_model.weights) obj_fwd_model = obj_fwd_model.module.cpu() else: obj_fwd_model = None dataset = PhyreDataset( tier, task_ids, task_indices, # This info not needed for test case torch.LongTensor([0] * len(task_indices)), actions, cfg.simulator, mode='test', balance_classes=False, hard_negatives=False, init_clip_ratio_to_sim=cfg.eval.init_clip_ratio_to_sim, init_frames_to_sim=cfg.eval.init_frames_to_sim, frames_per_clip=cfg.eval.frames_per_clip, n_hist_frames=cfg.eval.n_hist_frames, drop_objs=cfg.eval.drop_objs, obj_fwd_model=obj_fwd_model, ) # res_actions may be different from actions since the last batch # might be smaller than the others, and we might end up dropping it res_scores, res_actions, res_indices, res_pixel_accs = ( trainer.eval_actions(model, dataset, len(actions), eval_batch_size, cfg)) for task_index, _ in enumerate(task_ids): mask = (res_indices == task_index) # When store_vis, the actions are selected differently, so this # assertion would not hold assert (cfg.eval.store_vis or (np.sum(mask) == (len(actions) // len(task_ids)))) if np.sum(mask) == 0: logging.warning('Missing task %s from evaluation!', task_ids[task_index]) continue # statuses = cache.load_simulation_states(task_id) evaluator.wrapper_add_scores(task_index, res_scores[:, mask], res_actions[mask]) # # Order of descending scores. # action_order = np.argsort(-scores) cls.print_pixel_accs_summary([res_pixel_accs], cfg.phyre_movable_channels) return evaluator
def solve(model, model2, save_images=False): tasks = [ '00000:001', '00000:002', '00000:003', '00000:004', '00000:005', '00001:001', '00001:002', '00001:003', '00001:004', '00001:005', '00002:007', '00002:011', '00002:015', '00002:017', '00002:023', '00003:000', '00003:001', '00003:002', '00003:003', '00003:004', '00004:063', '00004:071', '00004:092', '00004:094', '00004:095' ] tasks = json.load(open("most_tasks.txt", 'r')) eval_setup = 'ball_within_template' fold_id = 0 # For simplicity, we will just use one fold for evaluation. train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, fold_id) print('Size of resulting splits:\n train:', len(train_tasks), '\n dev:', len(dev_tasks), '\n test:', len(test_tasks)) tasks = train_tasks[:] print("tasks:\n", tasks) sim = phyre.initialize_simulator(tasks, 'ball') init_scenes = sim.initial_scenes X = T.tensor(format(init_scenes)).float() print("Init Scenes Shape:\n", X.shape) base_path = [] action_path = [] for i, t in enumerate(tasks): while True: action = sim.sample(i) action[2] = 0.01 res = sim.simulate_action(i, action, stride=20) if type(res.images) != type(None): base_path.append(rollouts_to_channel([res.images], 2)) action_path.append(rollouts_to_channel([res.images], 1)) break base_path = T.tensor(np.concatenate(base_path)).float() action_path = T.tensor(np.concatenate(base_path)).float() with T.no_grad(): Z = model(X) A = model2(T.cat((X[:, 1:], base_path[:, None], Z), dim=1)) #B = model3(T.cat((X[:,1:], Y[:,None,2], Z, A), dim=1)) #B = extract_action(A, inspect=-2 if save_images else -1) B = extract_action(action_path[:, None], inspect=-2 if save_images else -1) # Saving Images: if save_images: for inspect in range(len(X)): plt.imsave( f"result/flownet/{inspect}_init.png", T.cat(tuple( T.cat((sub, T.ones(32, 1) * 0.5), dim=1) for sub in X[inspect]), dim=1)) plt.imsave(f"result/flownet/{inspect}_base.png", base_path[inspect]) plt.imsave(f"result/flownet/{inspect}_target.png", Z[inspect, 0]) #plt.imsave(f"result/flownet/{inspect}_init_scene.png", np.flip(batch[inspect][0], axis=0)) plt.imsave(f"result/flownet/{inspect}_action.png", A[inspect, 0]) plt.imsave(f"result/flownet/{inspect}_selection.png", B[inspect, 0]) gen_actions = [] for b in B[:, 0]: gen_actions.append(pic_to_values(b)) print(gen_actions) # Feed actions into simulator eva = phyre.Evaluator(tasks) solved, valid, comb = dict(), dict(), dict() for i, t in enumerate(tasks): if not (t[:5] in comb): comb[t[:5]] = 0 valid[t[:5]] = 0 solved[t[:5]] = 0 base_action = gen_actions[i] # Random Agent Intercept: #action = sim.sample() res = sim.simulate_action(i, base_action) tries = 0 alpha = 1 # 100 Tries Max: while eva.get_attempts_for_task(i) < 100: if not res.status.is_solved(): action = np.array(base_action) + np.random.randn(3) * np.array( [0.1, 0.1, 0.1]) * alpha res = sim.simulate_action(i, action) subtries = 0 while subtries < 100 and res.status.is_invalid(): subtries += 1 action_var = np.array(action) + np.random.randn( 3) * np.array([0.05, 0.05, 0.05]) * alpha res = sim.simulate_action(i, action_var) eva.maybe_log_attempt(i, res.status) alpha *= 1.01 else: eva.maybe_log_attempt(i, res.status) tries += 1 if save_images: try: for k, img in enumerate(res.images): plt.imsave(f"result/flownet/{i}_{k}.png", np.flip(img, axis=0)) pass except Exception: pass #print(i, t, res.status.is_solved(), not res.status.is_invalid()) comb[t[:5]] = comb[t[:5]] + 1 if not res.status.is_invalid(): valid[t[:5]] = valid[t[:5]] + 1 if res.status.is_solved(): solved[t[:5]] = solved[t[:5]] + 1 # Prepare Plotting print(eva.compute_all_metrics()) print(eva.get_auccess()) spacing = [1, 2, 3, 4] fig, ax = plt.subplots(5, 5, sharey=True, sharex=True) for i, t in enumerate(comb): ax[i // 5, i % 5].bar(spacing, [ solved[t[:5]] / (valid[t[:5]] if valid[t[:5]] else 1), solved[t[:5]] / comb[t[:5]], valid[t[:5]] / comb[t[:5]], comb[t[:5]] / 100 ]) ax[i // 5, i % 5].set_xlabel(t[:5]) plt.show()
distance_map = 255 * distance_map / (img.shape[0] * 2) distance_map[distance_map > 255.] = 255. distance_map = 255. - distance_map return distance_map # improve/debug time-step selection for injection # implement 5 random positions at the goal object # take into account grey obstacles # run the benchmark with stats on compute on GPU cluster if __name__ == "__main__": x = 42 y = 42 sim = phyre.initialize_simulator(['00002:017'], "ball") # img = cv2.imread('maze.png') # read image init_scene = sim.initial_scenes[0] img = phyre.observations_to_float_rgb(init_scene) # read image img = cv2.resize(img, (64, 64)) print(img) cv2.imwrite('00002_017_scene.png', img * 255) target = np.flip((init_scene == 4), axis=0).astype(float) target = cv2.resize(target, (64, 64)) # cv2.imwrite('maze-initial.png', img) distance_map = find_distance_map_obj(img, target) #distance_map[y-1, x] = 0. #distance_map[y, x] = 0. #distance_map[y+1, x] = 0. #distance_map[y, x-1] = 0. #distance_map[y, x+1] = 0.