def eval(self, state, task_ids, tier): model = state['model'] cache = state['cache'] # NOTE: Current agent is only using the actions that are seen in the training set, # though agent has the ability to rank the actions that are not seen in the training set actions = state['cache'].action_array[:self.params['rank_size']] model.cuda() simulator = phyre.initialize_simulator(task_ids, tier) observations = simulator.initial_scenes evaluator = phyre.Evaluator(task_ids) for task_index in range(len(task_ids)): task_id = simulator.task_ids[task_index] observation = observations[task_index] scores = self.neural_model.eval_actions( model, actions, self.params['eval_batch_size'], observation) # Rank of the actions in descending order action_order = np.argsort(-scores) # Result of the actions are already stored in cache statuses = cache.load_simulation_states(task_id) for action_id in action_order: if evaluator.get_attempts_for_task( task_index) >= self.params['max_attempts_per_task']: break status = phyre.SimulationStatus(statuses[action_id]) evaluator.maybe_log_attempt(task_index, status) return evaluator
def real_eval(cls, cache, model, actions, task_ids, tier, max_attempts_per_task, eval_batch_size, finetune_iterations, refine_iterations, refine_loss, refine_lr): # TODO: move to a flag. finetune_lr = 1e-4 model.cuda() simulator = phyre.initialize_simulator(task_ids, tier) observations = simulator.initial_scenes assert tuple(task_ids) == simulator.task_ids logging.info('Ranking %d actions and simulating top %d', len(actions), max_attempts_per_task) if refine_iterations > 0: logging.info( 'Will do refining for %d iterations with lr=%e and loss=%s', refine_iterations, refine_lr, refine_loss) evaluator = phyre.Evaluator(task_ids) for task_index in tqdm.trange(len(task_ids)): task_id = simulator.task_ids[task_index] if refine_iterations > 0: refined_actions = neural_agent.refine_actions( model, actions, observations[task_index], refine_lr, refine_iterations, eval_batch_size, refine_loss) else: refined_actions = actions scores = neural_agent.eval_actions(model, refined_actions, eval_batch_size, observations[task_index]) # Order of descendig scores. action_order = np.argsort(-scores) if not refine_iterations > 0: statuses = cache.load_simulation_states(task_id) finetune_data = [] for action_id in action_order: if evaluator.get_attempts_for_task( task_index) >= max_attempts_per_task: break action = refined_actions[action_id] if refine_iterations > 0: status = simulator.simulate_action( task_index, action, need_images=False, need_scenes=False).status else: status = phyre.SimulationStatus(statuses[action_id]) finetune_data.append((task_index, status, action)) evaluator.maybe_log_attempt(task_index, status) if evaluator.get_attempts_for_task(task_index) == 0: logging.warning('Made 0 attempts for task %s', task_id) if finetune_iterations > 0: neural_agent.finetune(model, finetune_data, simulator, finetune_lr, finetune_iterations) return evaluator
def _eval(cls, cache, train_sim_statuses, task_ids, evaluator, max_attempts_per_task, mem_test_simulation_weight, mem_rerank_size, mem_scoring_type, **kwargs): del kwargs # Unused. #action_scores = train_sim_statuses.astype('float32').sum(0) if mem_rerank_size > 0: train_sim_statuses = train_sim_statuses[:, :mem_rerank_size] positive = (train_sim_statuses.astype('float32') > 0).sum(0) negative = (train_sim_statuses.astype('float32') < 0).sum(0) if mem_scoring_type == 'relative': denominators = positive + negative + 1 action_scores = positive / denominators elif mem_scoring_type == 'absolute': denominators = positive * 0 + 1 action_scores = positive - negative else: raise ValueError(f'Unknown mem_scoring_type={mem_scoring_type}') regret_action_heap = MaxHeapWithSideLoad(enumerate(action_scores)) logging.info('Found %d actions to choose from', len(regret_action_heap)) logging.info('Starting eval simulation. mem_test_simulation_weight=%f', mem_test_simulation_weight) for i, task_id in enumerate(task_ids): statuses = cache.load_simulation_states(task_id) to_push = [] while regret_action_heap and evaluator.get_attempts_for_task( i) < max_attempts_per_task: action_id, success_rate = regret_action_heap.pop_max() status = phyre.SimulationStatus(statuses[action_id]) evaluator.maybe_log_attempt(i, status) if mem_scoring_type == 'relative': if status != 0: successes = success_rate * denominators[action_id] successes += mem_test_simulation_weight * float( status > 0) denominators[action_id] += mem_test_simulation_weight success_rate = successes / denominators[action_id] elif mem_scoring_type == 'absolute': success_rate += float(status) * mem_test_simulation_weight else: raise ValueError( f'Unknown mem_scoring_type={mem_scoring_type}') to_push.append((action_id, success_rate)) for action, reward in to_push: regret_action_heap.push(action, reward) logging.info('Collected %s simulation samples for %s tasks', len(evaluator), len(task_ids)) return evaluator
def real_eval(cls, cache, model, actions, task_ids, tier, max_attempts_per_task, eval_batch_size, finetune_iterations, refine_iterations, refine_loss, refine_lr): # TODO: move to a flag. finetune_lr = 1e-4 model.cuda() simulator = phyre.initialize_simulator(task_ids, tier) observations = simulator.initial_scenes # CUSTOM if os.path.exists(cls.ACTION_PATH_DIR): with open(cls.ACTION_PATH_DIR + '/channel_paths.pickle', 'rb') as fp: action_path_dict = pickle.load(fp) action_paths = torch.Tensor([ action_path_dict[task] if task in action_path_dict else torch.zeros(256, 256) for task in task_ids ])[:, None].cuda() else: print("can't find action_path_dict!") exit(-1) assert tuple(task_ids) == simulator.task_ids logging.info('Ranking %d actions and simulating top %d', len(actions), max_attempts_per_task) if refine_iterations > 0: logging.info( 'Will do refining for %d iterations with lr=%e and loss=%s', refine_iterations, refine_lr, refine_loss) evaluator = phyre.Evaluator(task_ids) for task_index in tqdm.trange(len(task_ids)): task_id = simulator.task_ids[task_index] if refine_iterations > 0: refined_actions = neural_agent.refine_actions( model, actions, observations[task_index], refine_lr, refine_iterations, eval_batch_size, refine_loss) else: refined_actions = actions scores = neural_agent.eval_actions( model, refined_actions, eval_batch_size, observations[task_index], action_path=action_paths[task_index]) # Order of descendig scores. action_order = np.argsort(-scores) if not refine_iterations > 0: statuses = cache.load_simulation_states(task_id) finetune_data = [] for action_id in action_order: if evaluator.get_attempts_for_task( task_index) >= max_attempts_per_task: break action = refined_actions[action_id] if refine_iterations > 0: status = simulator.simulate_action( task_index, action, need_images=False, need_scenes=False).status else: status = phyre.SimulationStatus(statuses[action_id]) finetune_data.append((task_index, status, action)) evaluator.maybe_log_attempt(task_index, status) if evaluator.get_attempts_for_task(task_index) == 0: logging.warning('Made 0 attempts for task %s', task_id) if finetune_iterations > 0: neural_agent.finetune(model, finetune_data, simulator, finetune_lr, finetune_iterations) return evaluator