Ejemplo n.º 1
0
    def real_eval(cls, cache, model, actions, task_ids, tier,
                  max_attempts_per_task, eval_batch_size, finetune_iterations,
                  refine_iterations, refine_loss, refine_lr):

        # TODO: move to a flag.
        finetune_lr = 1e-4

        model.cuda()

        simulator = phyre.initialize_simulator(task_ids, tier)
        observations = simulator.initial_scenes
        assert tuple(task_ids) == simulator.task_ids

        logging.info('Ranking %d actions and simulating top %d', len(actions),
                     max_attempts_per_task)
        if refine_iterations > 0:
            logging.info(
                'Will do refining for %d iterations with lr=%e and loss=%s',
                refine_iterations, refine_lr, refine_loss)
        evaluator = phyre.Evaluator(task_ids)
        for task_index in tqdm.trange(len(task_ids)):
            task_id = simulator.task_ids[task_index]
            if refine_iterations > 0:
                refined_actions = neural_agent.refine_actions(
                    model, actions, observations[task_index], refine_lr,
                    refine_iterations, eval_batch_size, refine_loss)
            else:
                refined_actions = actions
            scores = neural_agent.eval_actions(model, refined_actions,
                                               eval_batch_size,
                                               observations[task_index])
            # Order of descendig scores.
            action_order = np.argsort(-scores)
            if not refine_iterations > 0:
                statuses = cache.load_simulation_states(task_id)

            finetune_data = []
            for action_id in action_order:
                if evaluator.get_attempts_for_task(
                        task_index) >= max_attempts_per_task:
                    break
                action = refined_actions[action_id]
                if refine_iterations > 0:
                    status = simulator.simulate_action(
                        task_index,
                        action,
                        need_images=False,
                        need_scenes=False).status
                else:
                    status = phyre.SimulationStatus(statuses[action_id])
                finetune_data.append((task_index, status, action))
                evaluator.maybe_log_attempt(task_index, status)
            if evaluator.get_attempts_for_task(task_index) == 0:
                logging.warning('Made 0 attempts for task %s', task_id)
            if finetune_iterations > 0:
                neural_agent.finetune(model, finetune_data, simulator,
                                      finetune_lr, finetune_iterations)

        return evaluator
Ejemplo n.º 2
0
    def real_eval(cls, cache, model, actions, task_ids, tier,
                  max_attempts_per_task, eval_batch_size, finetune_iterations,
                  refine_iterations, refine_loss, refine_lr):

        # TODO: move to a flag.
        finetune_lr = 1e-4

        model.cuda()

        simulator = phyre.initialize_simulator(task_ids, tier)
        observations = simulator.initial_scenes

        # CUSTOM
        if os.path.exists(cls.ACTION_PATH_DIR):
            with open(cls.ACTION_PATH_DIR + '/channel_paths.pickle',
                      'rb') as fp:
                action_path_dict = pickle.load(fp)
            action_paths = torch.Tensor([
                action_path_dict[task]
                if task in action_path_dict else torch.zeros(256, 256)
                for task in task_ids
            ])[:, None].cuda()
        else:
            print("can't find action_path_dict!")
            exit(-1)

        assert tuple(task_ids) == simulator.task_ids

        logging.info('Ranking %d actions and simulating top %d', len(actions),
                     max_attempts_per_task)
        if refine_iterations > 0:
            logging.info(
                'Will do refining for %d iterations with lr=%e and loss=%s',
                refine_iterations, refine_lr, refine_loss)
        evaluator = phyre.Evaluator(task_ids)
        for task_index in tqdm.trange(len(task_ids)):
            task_id = simulator.task_ids[task_index]
            if refine_iterations > 0:
                refined_actions = neural_agent.refine_actions(
                    model, actions, observations[task_index], refine_lr,
                    refine_iterations, eval_batch_size, refine_loss)
            else:
                refined_actions = actions
            scores = neural_agent.eval_actions(
                model,
                refined_actions,
                eval_batch_size,
                observations[task_index],
                action_path=action_paths[task_index])
            # Order of descendig scores.
            action_order = np.argsort(-scores)
            if not refine_iterations > 0:
                statuses = cache.load_simulation_states(task_id)

            finetune_data = []
            for action_id in action_order:
                if evaluator.get_attempts_for_task(
                        task_index) >= max_attempts_per_task:
                    break
                action = refined_actions[action_id]
                if refine_iterations > 0:
                    status = simulator.simulate_action(
                        task_index,
                        action,
                        need_images=False,
                        need_scenes=False).status
                else:
                    status = phyre.SimulationStatus(statuses[action_id])
                finetune_data.append((task_index, status, action))
                evaluator.maybe_log_attempt(task_index, status)
            if evaluator.get_attempts_for_task(task_index) == 0:
                logging.warning('Made 0 attempts for task %s', task_id)
            if finetune_iterations > 0:
                neural_agent.finetune(model, finetune_data, simulator,
                                      finetune_lr, finetune_iterations)

        return evaluator