def __init__(self, trainer: Trainer.__class__, weights: str): if not ray.is_initialized(): ray.init() self.obs_state_processor = SimpleObsStateProcessor( infected_population_sorting_per_city) self.act_state_processor = SimpleActStateProcessor( sort_pathogens=self.obs_state_processor.sort_pathogens) register_env( "ic20env", lambda _: InferenceIC20Environment( self.obs_state_processor, self.act_state_processor)) self.trainer = self._load_trainer(trainer(env="ic20env"), weights)
def test_update_city_pathogens_representations( simple_obs_state_processor: SimpleObsStateProcessor, available_pathogens: List[Pathogen]): status = 4 # medication infected_population = np.array([np.round(500 * .8)], dtype=np.uint32) pathogen_attributes = np.array([0, 1, 2, -2], dtype=np.int8) pathogen_representation = (status, infected_population, pathogen_attributes) pathogen = available_pathogens[3] stub = simple_obs_state_processor._build_pathogen_stub() expected = [stub if i < 3 else pathogen_representation for i in range(4)] pathogen_list = [] actual = simple_obs_state_processor._update_city_pathogens_representations( pathogen_list, pathogen, pathogen_representation, available_pathogens) assert compare(actual, expected) and len(actual) == len(expected)
def test_preprocess_obs(simple_obs_state_processor: SimpleObsStateProcessor, gamestate_stub: GameState, available_pathogens: List[Pathogen]): expected_city_obs = [] for city in gamestate_stub.cities: location = (np.array([90], dtype=np.float32), np.array([-90], dtype=np.float32)) population = np.array([500], dtype=np.uint32) connections = np.int64(2) attributes = np.array([0, 1, 2, -2], dtype=np.int8) pathogens = simple_obs_state_processor._build_pathogen_obs_representation( city.pathogens, city.population, list(reversed(available_pathogens)), gamestate_stub) expected_city_obs.append( (location, population, connections, attributes, pathogens)) actual_city_obs = simple_obs_state_processor.preprocess_obs(gamestate_stub) assert compare(actual_city_obs, expected_city_obs)
def test_map_pathogen_status( simple_obs_state_processor: SimpleObsStateProcessor, available_pathogens: List[Pathogen], gamestate_stub: GameState): patho0 = 5 # vaccination exists patho1 = 4 # medication exists patho2 = 8 # medication && vaccination exist patho3 = 6 # medication exists vaccination in dev patho4 = 7 # vaccination exists medication in dev patho_states = (patho0, patho1, patho2, patho3, patho4) for pathogen, patho_state in zip(available_pathogens, patho_states): assert simple_obs_state_processor._map_pathogen_status( pathogen, gamestate_stub) == patho_state
def test_build_pathogen_obs_representation( simple_obs_state_processor: SimpleObsStateProcessor, city_with_pathogens: City, available_pathogens: List[Pathogen], gamestate_stub: GameState): # GIVEN city_pathogens = city_with_pathogens.pathogens city_population = 5000 # WHEN actual_pathogen_representation = simple_obs_state_processor._build_pathogen_obs_representation( city_pathogens, city_population, available_pathogens, gamestate_stub) # THEN expected_pathogen_representations = [] # patho1 status = 4 # medication infected_population = np.array([np.round(city_population * .8)], dtype=np.uint32) pathogen_attributes = np.array([0, 1, 2, -2], dtype=np.int8) expected_pathogen_representations.append( (status, infected_population, pathogen_attributes)) # patho2 status = 8 # vaccination && medication infected_population = np.array([np.round(city_population * .8)], dtype=np.uint32) pathogen_attributes = np.array([0, 1, 2, -2], dtype=np.int8) expected_pathogen_representations.append( (status, infected_population, pathogen_attributes)) expected_pathogen_representations.append( simple_obs_state_processor._build_pathogen_stub()) expected_pathogen_representations.append( simple_obs_state_processor._build_pathogen_stub()) expected_pathogen_representations.append( simple_obs_state_processor._build_pathogen_stub()) expected_pathogen_representations = tuple( expected_pathogen_representations) assert compare(actual_pathogen_representation, expected_pathogen_representations)
class ReinforcedApproach(Approach): trial_max = 10 def __init__(self, trainer: Trainer.__class__, weights: str): if not ray.is_initialized(): ray.init() self.obs_state_processor = SimpleObsStateProcessor( infected_population_sorting_per_city) self.act_state_processor = SimpleActStateProcessor( sort_pathogens=self.obs_state_processor.sort_pathogens) register_env( "ic20env", lambda _: InferenceIC20Environment( self.obs_state_processor, self.act_state_processor)) self.trainer = self._load_trainer(trainer(env="ic20env"), weights) def process_round(self, state: GameState): return self._choose_actionable_action(state) def _choose_actionable_action(self, state: GameState) -> Action: processed_state = self.obs_state_processor.preprocess_obs(state) mapped_action = INVALID_ACTION trial_count = 0 while (mapped_action == INVALID_ACTION or mapped_action.cost > state.points) \ or mapped_action not in actions.generate_possible_actions(state): action = self.trainer.compute_action(observation=processed_state) mapped_action, _ = self.act_state_processor.map_action( action, state) trial_count += 1 if trial_count >= self.trial_max: mapped_action = actions.end_round() break return mapped_action @classmethod def _load_trainer(cls, trainer: Trainer, weights_path: str) -> Trainer: trainer.restore(weights_path) return trainer
from ray.tune.logger import pretty_print from ray.tune.util import merge_dicts from approaches.reinforced.action_state_processor import SimpleActStateProcessor from approaches.reinforced.constants import DEFAULT_CONFIG from approaches.reinforced.environment import SimplifiedIC20Environment, CHECKPOINT_FILE from approaches.reinforced.observation_state_processor import SimpleObsStateProcessor, \ infected_population_sorting_per_city # won't start sgd from approaches.reinforced.reward_function import UnstableReward if __name__ == "__main__": ray.init( address='auto' ) # address = None when running locally. address = 'auto' when running on aws.] obs_state_processor = SimpleObsStateProcessor( pathogen_sorting_strategy=infected_population_sorting_per_city) act_state_processor = SimpleActStateProcessor( sort_pathogens=obs_state_processor.sort_pathogens) # Notice that trial_max will only work for stochastic policies register_env( "ic20env", lambda _: SimplifiedIC20Environment(obs_state_processor, act_state_processor, UnstableReward(), trial_max=10)) ten_gig = 10737418240 trainer = A2CTrainer( env="ic20env", config=merge_dicts( DEFAULT_CONFIG,
def test_sort_pathogens(simple_obs_state_processor: SimpleObsStateProcessor, available_pathogens: List[Pathogen], gamestate_stub: GameState): assert (list(reversed(available_pathogens)) == simple_obs_state_processor.sort_pathogens(available_pathogens, gamestate_stub))
def simple_obs_state_processor( available_pathogens: List[Pathogen]) -> SimpleObsStateProcessor: def identity_ordering(city: City, pathogen: Pathogen) -> float: return available_pathogens.index(pathogen) return SimpleObsStateProcessor(identity_ordering)
def test_aggregate_obs_space_over_cities( simple_obs_state_processor: SimpleObsStateProcessor): example_space = gym.spaces.Discrete(5) assert [example_space for _ in range(MAX_CITIES)] \ == simple_obs_state_processor._aggregate_obs_space_over_cities(example_space)
def test_get_pathogen_population( simple_obs_state_processor: SimpleObsStateProcessor, gamestate_stub: GameState, available_pathogens: List[Pathogen]): for pathogen in available_pathogens: assert simple_obs_state_processor._get_pathogen_population(gamestate_stub, pathogen) \ == len(gamestate_stub.cities) * 500 * .8