def test_preprocess_obs(simple_obs_state_processor: SimpleObsStateProcessor, gamestate_stub: GameState, available_pathogens: List[Pathogen]): expected_city_obs = [] for city in gamestate_stub.cities: location = (np.array([90], dtype=np.float32), np.array([-90], dtype=np.float32)) population = np.array([500], dtype=np.uint32) connections = np.int64(2) attributes = np.array([0, 1, 2, -2], dtype=np.int8) pathogens = simple_obs_state_processor._build_pathogen_obs_representation( city.pathogens, city.population, list(reversed(available_pathogens)), gamestate_stub) expected_city_obs.append( (location, population, connections, attributes, pathogens)) actual_city_obs = simple_obs_state_processor.preprocess_obs(gamestate_stub) assert compare(actual_city_obs, expected_city_obs)
class ReinforcedApproach(Approach): trial_max = 10 def __init__(self, trainer: Trainer.__class__, weights: str): if not ray.is_initialized(): ray.init() self.obs_state_processor = SimpleObsStateProcessor( infected_population_sorting_per_city) self.act_state_processor = SimpleActStateProcessor( sort_pathogens=self.obs_state_processor.sort_pathogens) register_env( "ic20env", lambda _: InferenceIC20Environment( self.obs_state_processor, self.act_state_processor)) self.trainer = self._load_trainer(trainer(env="ic20env"), weights) def process_round(self, state: GameState): return self._choose_actionable_action(state) def _choose_actionable_action(self, state: GameState) -> Action: processed_state = self.obs_state_processor.preprocess_obs(state) mapped_action = INVALID_ACTION trial_count = 0 while (mapped_action == INVALID_ACTION or mapped_action.cost > state.points) \ or mapped_action not in actions.generate_possible_actions(state): action = self.trainer.compute_action(observation=processed_state) mapped_action, _ = self.act_state_processor.map_action( action, state) trial_count += 1 if trial_count >= self.trial_max: mapped_action = actions.end_round() break return mapped_action @classmethod def _load_trainer(cls, trainer: Trainer, weights_path: str) -> Trainer: trainer.restore(weights_path) return trainer