コード例 #1
0
def test_preprocess_obs(simple_obs_state_processor: SimpleObsStateProcessor,
                        gamestate_stub: GameState,
                        available_pathogens: List[Pathogen]):
    expected_city_obs = []
    for city in gamestate_stub.cities:
        location = (np.array([90], dtype=np.float32),
                    np.array([-90], dtype=np.float32))
        population = np.array([500], dtype=np.uint32)
        connections = np.int64(2)
        attributes = np.array([0, 1, 2, -2], dtype=np.int8)
        pathogens = simple_obs_state_processor._build_pathogen_obs_representation(
            city.pathogens, city.population,
            list(reversed(available_pathogens)), gamestate_stub)
        expected_city_obs.append(
            (location, population, connections, attributes, pathogens))

    actual_city_obs = simple_obs_state_processor.preprocess_obs(gamestate_stub)
    assert compare(actual_city_obs, expected_city_obs)
コード例 #2
0
class ReinforcedApproach(Approach):
    trial_max = 10

    def __init__(self, trainer: Trainer.__class__, weights: str):
        if not ray.is_initialized():
            ray.init()
        self.obs_state_processor = SimpleObsStateProcessor(
            infected_population_sorting_per_city)
        self.act_state_processor = SimpleActStateProcessor(
            sort_pathogens=self.obs_state_processor.sort_pathogens)
        register_env(
            "ic20env", lambda _: InferenceIC20Environment(
                self.obs_state_processor, self.act_state_processor))

        self.trainer = self._load_trainer(trainer(env="ic20env"), weights)

    def process_round(self, state: GameState):
        return self._choose_actionable_action(state)

    def _choose_actionable_action(self, state: GameState) -> Action:
        processed_state = self.obs_state_processor.preprocess_obs(state)
        mapped_action = INVALID_ACTION

        trial_count = 0
        while (mapped_action == INVALID_ACTION or mapped_action.cost > state.points) \
                or mapped_action not in actions.generate_possible_actions(state):
            action = self.trainer.compute_action(observation=processed_state)
            mapped_action, _ = self.act_state_processor.map_action(
                action, state)

            trial_count += 1
            if trial_count >= self.trial_max:
                mapped_action = actions.end_round()
                break

        return mapped_action

    @classmethod
    def _load_trainer(cls, trainer: Trainer, weights_path: str) -> Trainer:
        trainer.restore(weights_path)
        return trainer