Exemple #1
0
    def eval(self, state, task_ids, tier):
        model = state['model']
        cache = state['cache']
        # NOTE: Current agent is only using the actions that are seen in the training set,
        #       though agent has the ability to rank the actions that are not seen in the training set
        actions = state['cache'].action_array[:self.params['rank_size']]

        model.cuda()
        simulator = phyre.initialize_simulator(task_ids, tier)
        observations = simulator.initial_scenes
        evaluator = phyre.Evaluator(task_ids)

        for task_index in range(len(task_ids)):
            task_id = simulator.task_ids[task_index]
            observation = observations[task_index]
            scores = self.neural_model.eval_actions(
                model, actions, self.params['eval_batch_size'], observation)
            # Rank of the actions in descending order
            action_order = np.argsort(-scores)
            # Result of the actions are already stored in cache
            statuses = cache.load_simulation_states(task_id)

            for action_id in action_order:
                if evaluator.get_attempts_for_task(
                        task_index) >= self.params['max_attempts_per_task']:
                    break
                status = phyre.SimulationStatus(statuses[action_id])
                evaluator.maybe_log_attempt(task_index, status)
        return evaluator
Exemple #2
0
    def real_eval(cls, cache, model, actions, task_ids, tier,
                  max_attempts_per_task, eval_batch_size, finetune_iterations,
                  refine_iterations, refine_loss, refine_lr):

        # TODO: move to a flag.
        finetune_lr = 1e-4

        model.cuda()

        simulator = phyre.initialize_simulator(task_ids, tier)
        observations = simulator.initial_scenes
        assert tuple(task_ids) == simulator.task_ids

        logging.info('Ranking %d actions and simulating top %d', len(actions),
                     max_attempts_per_task)
        if refine_iterations > 0:
            logging.info(
                'Will do refining for %d iterations with lr=%e and loss=%s',
                refine_iterations, refine_lr, refine_loss)
        evaluator = phyre.Evaluator(task_ids)
        for task_index in tqdm.trange(len(task_ids)):
            task_id = simulator.task_ids[task_index]
            if refine_iterations > 0:
                refined_actions = neural_agent.refine_actions(
                    model, actions, observations[task_index], refine_lr,
                    refine_iterations, eval_batch_size, refine_loss)
            else:
                refined_actions = actions
            scores = neural_agent.eval_actions(model, refined_actions,
                                               eval_batch_size,
                                               observations[task_index])
            # Order of descendig scores.
            action_order = np.argsort(-scores)
            if not refine_iterations > 0:
                statuses = cache.load_simulation_states(task_id)

            finetune_data = []
            for action_id in action_order:
                if evaluator.get_attempts_for_task(
                        task_index) >= max_attempts_per_task:
                    break
                action = refined_actions[action_id]
                if refine_iterations > 0:
                    status = simulator.simulate_action(
                        task_index,
                        action,
                        need_images=False,
                        need_scenes=False).status
                else:
                    status = phyre.SimulationStatus(statuses[action_id])
                finetune_data.append((task_index, status, action))
                evaluator.maybe_log_attempt(task_index, status)
            if evaluator.get_attempts_for_task(task_index) == 0:
                logging.warning('Made 0 attempts for task %s', task_id)
            if finetune_iterations > 0:
                neural_agent.finetune(model, finetune_data, simulator,
                                      finetune_lr, finetune_iterations)

        return evaluator
Exemple #3
0
    def _eval(cls, cache, train_sim_statuses, task_ids, evaluator,
              max_attempts_per_task, mem_test_simulation_weight,
              mem_rerank_size, mem_scoring_type, **kwargs):
        del kwargs  # Unused.

        #action_scores = train_sim_statuses.astype('float32').sum(0)
        if mem_rerank_size > 0:
            train_sim_statuses = train_sim_statuses[:, :mem_rerank_size]
        positive = (train_sim_statuses.astype('float32') > 0).sum(0)
        negative = (train_sim_statuses.astype('float32') < 0).sum(0)
        if mem_scoring_type == 'relative':
            denominators = positive + negative + 1
            action_scores = positive / denominators
        elif mem_scoring_type == 'absolute':
            denominators = positive * 0 + 1
            action_scores = positive - negative
        else:
            raise ValueError(f'Unknown mem_scoring_type={mem_scoring_type}')

        regret_action_heap = MaxHeapWithSideLoad(enumerate(action_scores))

        logging.info('Found %d actions to choose from',
                     len(regret_action_heap))

        logging.info('Starting eval simulation. mem_test_simulation_weight=%f',
                     mem_test_simulation_weight)
        for i, task_id in enumerate(task_ids):
            statuses = cache.load_simulation_states(task_id)
            to_push = []
            while regret_action_heap and evaluator.get_attempts_for_task(
                    i) < max_attempts_per_task:
                action_id, success_rate = regret_action_heap.pop_max()
                status = phyre.SimulationStatus(statuses[action_id])
                evaluator.maybe_log_attempt(i, status)
                if mem_scoring_type == 'relative':
                    if status != 0:
                        successes = success_rate * denominators[action_id]
                        successes += mem_test_simulation_weight * float(
                            status > 0)
                        denominators[action_id] += mem_test_simulation_weight
                        success_rate = successes / denominators[action_id]
                elif mem_scoring_type == 'absolute':
                    success_rate += float(status) * mem_test_simulation_weight
                else:
                    raise ValueError(
                        f'Unknown mem_scoring_type={mem_scoring_type}')
                to_push.append((action_id, success_rate))
            for action, reward in to_push:
                regret_action_heap.push(action, reward)

        logging.info('Collected %s simulation samples for %s tasks',
                     len(evaluator), len(task_ids))

        return evaluator
    def real_eval(cls, cache, model, actions, task_ids, tier,
                  max_attempts_per_task, eval_batch_size, finetune_iterations,
                  refine_iterations, refine_loss, refine_lr):

        # TODO: move to a flag.
        finetune_lr = 1e-4

        model.cuda()

        simulator = phyre.initialize_simulator(task_ids, tier)
        observations = simulator.initial_scenes

        # CUSTOM
        if os.path.exists(cls.ACTION_PATH_DIR):
            with open(cls.ACTION_PATH_DIR + '/channel_paths.pickle',
                      'rb') as fp:
                action_path_dict = pickle.load(fp)
            action_paths = torch.Tensor([
                action_path_dict[task]
                if task in action_path_dict else torch.zeros(256, 256)
                for task in task_ids
            ])[:, None].cuda()
        else:
            print("can't find action_path_dict!")
            exit(-1)

        assert tuple(task_ids) == simulator.task_ids

        logging.info('Ranking %d actions and simulating top %d', len(actions),
                     max_attempts_per_task)
        if refine_iterations > 0:
            logging.info(
                'Will do refining for %d iterations with lr=%e and loss=%s',
                refine_iterations, refine_lr, refine_loss)
        evaluator = phyre.Evaluator(task_ids)
        for task_index in tqdm.trange(len(task_ids)):
            task_id = simulator.task_ids[task_index]
            if refine_iterations > 0:
                refined_actions = neural_agent.refine_actions(
                    model, actions, observations[task_index], refine_lr,
                    refine_iterations, eval_batch_size, refine_loss)
            else:
                refined_actions = actions
            scores = neural_agent.eval_actions(
                model,
                refined_actions,
                eval_batch_size,
                observations[task_index],
                action_path=action_paths[task_index])
            # Order of descendig scores.
            action_order = np.argsort(-scores)
            if not refine_iterations > 0:
                statuses = cache.load_simulation_states(task_id)

            finetune_data = []
            for action_id in action_order:
                if evaluator.get_attempts_for_task(
                        task_index) >= max_attempts_per_task:
                    break
                action = refined_actions[action_id]
                if refine_iterations > 0:
                    status = simulator.simulate_action(
                        task_index,
                        action,
                        need_images=False,
                        need_scenes=False).status
                else:
                    status = phyre.SimulationStatus(statuses[action_id])
                finetune_data.append((task_index, status, action))
                evaluator.maybe_log_attempt(task_index, status)
            if evaluator.get_attempts_for_task(task_index) == 0:
                logging.warning('Made 0 attempts for task %s', task_id)
            if finetune_iterations > 0:
                neural_agent.finetune(model, finetune_data, simulator,
                                      finetune_lr, finetune_iterations)

        return evaluator