def test_generate_global_vaccine_actions( simple_act_state_processor: SimpleActStateProcessor, available_pathogens: List[Pathogen], pathogens_with_vaccination: List[Pathogen]): develop_vaccine_actions = [ actions.develop_vaccine(pathogen.index) if (pathogen not in pathogens_with_vaccination) else INVALID_ACTION for pathogen in available_pathogens ] assert list( simple_act_state_processor._generate_global_vaccine_actions( available_pathogens, pathogens_with_vaccination)) == develop_vaccine_actions assert list( simple_act_state_processor._generate_global_vaccine_actions( available_pathogens, list(reversed( pathogens_with_vaccination)))) == develop_vaccine_actions assert list( simple_act_state_processor._generate_global_vaccine_actions( list(reversed(available_pathogens)), pathogens_with_vaccination)) == list( reversed(develop_vaccine_actions))
def test_map_global_actions( simple_act_state_processor: SimpleActStateProcessor, gamestate_stub: GameState, random_action_id: int): chosen_action: int = random_action_id pathogens_with_vacc_or_med = (1, 2, 3, 5, 6, 7, 8, 9) if random_action_id == 0 or random_action_id not in pathogens_with_vacc_or_med: assert simple_act_state_processor._map_global_actions(chosen_action, gamestate_stub) \ in actions.generate_possible_actions(gamestate_stub) else: assert simple_act_state_processor._map_global_actions( chosen_action, gamestate_stub) == INVALID_ACTION
def test_map_city_actions(simple_act_state_processor: SimpleActStateProcessor, gamestate_stub: GameState, random_action_id: int): chosen_action: int = random_action_id % CITY_ACTIONSPACE pathogens_with_vacc_or_med = (6, 8, 10, 12, 13, 14) if chosen_action in range( 0, 6) or chosen_action in pathogens_with_vacc_or_med: assert simple_act_state_processor._map_city_actions(chosen_action, gamestate_stub) \ in actions.generate_possible_actions(gamestate_stub) else: assert simple_act_state_processor._map_city_actions( chosen_action, gamestate_stub) == INVALID_ACTION
def __init__(self, trainer: Trainer.__class__, weights: str): if not ray.is_initialized(): ray.init() self.obs_state_processor = SimpleObsStateProcessor( infected_population_sorting_per_city) self.act_state_processor = SimpleActStateProcessor( sort_pathogens=self.obs_state_processor.sort_pathogens) register_env( "ic20env", lambda _: InferenceIC20Environment( self.obs_state_processor, self.act_state_processor)) self.trainer = self._load_trainer(trainer(env="ic20env"), weights)
def test_penalize_action(simple_act_state_processor: SimpleActStateProcessor, gamestate_stub: Mock, monkeypatch, chosen_action: Optional[Action], possible_actions: List[Action], expected_penalty: int): # GIVEN monkeypatch.setattr('models.actions.generate_possible_actions', lambda _: possible_actions) # THEN assert simple_act_state_processor.penalize_action(chosen_action, gamestate_stub) \ == (chosen_action, expected_penalty)
def test_generate_city_med_actions( simple_act_state_processor: SimpleActStateProcessor, city_with_pathogens: City, available_pathogens: List[Pathogen], pathogens_with_medication: List[Pathogen]): deploy_med_actions = [ actions.deploy_medication(pathogen.index, city_with_pathogens.index) if (pathogen in city_with_pathogens.pathogens and pathogen in pathogens_with_medication) else INVALID_ACTION for pathogen in available_pathogens ] assert simple_act_state_processor._generate_city_med_actions( city_with_pathogens, available_pathogens, pathogens_with_medication) == deploy_med_actions assert simple_act_state_processor._generate_city_med_actions( city_with_pathogens, available_pathogens, list(reversed(pathogens_with_medication))) == deploy_med_actions assert simple_act_state_processor._generate_city_med_actions( city_with_pathogens, list(reversed(available_pathogens)), pathogens_with_medication) == list(reversed(deploy_med_actions))
def test_map_action(simple_act_state_processor: SimpleActStateProcessor, gamestate_stub: GameState, random_action_id: int): if random_action_id < GLOBAL_ACTIONSPACE: expected = simple_act_state_processor.penalize_action( simple_act_state_processor._map_global_actions( random_action_id, gamestate_stub), gamestate_stub) assert expected == simple_act_state_processor.map_action( random_action_id, gamestate_stub) else: random_city_action_id = random_action_id - GLOBAL_ACTIONSPACE expected = simple_act_state_processor.penalize_action( simple_act_state_processor._map_city_actions( random_city_action_id, gamestate_stub), gamestate_stub) assert expected == simple_act_state_processor.map_action( random_action_id, gamestate_stub)
class ReinforcedApproach(Approach): trial_max = 10 def __init__(self, trainer: Trainer.__class__, weights: str): if not ray.is_initialized(): ray.init() self.obs_state_processor = SimpleObsStateProcessor( infected_population_sorting_per_city) self.act_state_processor = SimpleActStateProcessor( sort_pathogens=self.obs_state_processor.sort_pathogens) register_env( "ic20env", lambda _: InferenceIC20Environment( self.obs_state_processor, self.act_state_processor)) self.trainer = self._load_trainer(trainer(env="ic20env"), weights) def process_round(self, state: GameState): return self._choose_actionable_action(state) def _choose_actionable_action(self, state: GameState) -> Action: processed_state = self.obs_state_processor.preprocess_obs(state) mapped_action = INVALID_ACTION trial_count = 0 while (mapped_action == INVALID_ACTION or mapped_action.cost > state.points) \ or mapped_action not in actions.generate_possible_actions(state): action = self.trainer.compute_action(observation=processed_state) mapped_action, _ = self.act_state_processor.map_action( action, state) trial_count += 1 if trial_count >= self.trial_max: mapped_action = actions.end_round() break return mapped_action @classmethod def _load_trainer(cls, trainer: Trainer, weights_path: str) -> Trainer: trainer.restore(weights_path) return trainer
from approaches.reinforced.action_state_processor import SimpleActStateProcessor from approaches.reinforced.constants import DEFAULT_CONFIG from approaches.reinforced.environment import SimplifiedIC20Environment, CHECKPOINT_FILE from approaches.reinforced.observation_state_processor import SimpleObsStateProcessor, \ infected_population_sorting_per_city # won't start sgd from approaches.reinforced.reward_function import UnstableReward if __name__ == "__main__": ray.init( address='auto' ) # address = None when running locally. address = 'auto' when running on aws.] obs_state_processor = SimpleObsStateProcessor( pathogen_sorting_strategy=infected_population_sorting_per_city) act_state_processor = SimpleActStateProcessor( sort_pathogens=obs_state_processor.sort_pathogens) # Notice that trial_max will only work for stochastic policies register_env( "ic20env", lambda _: SimplifiedIC20Environment(obs_state_processor, act_state_processor, UnstableReward(), trial_max=10)) ten_gig = 10737418240 trainer = A2CTrainer( env="ic20env", config=merge_dicts( DEFAULT_CONFIG, { # -- Specific parameters
def test_generate_action_space( simple_act_state_processor: SimpleActStateProcessor): assert spaces.Discrete( MAX_ACTIONSPACE) == simple_act_state_processor.generate_action_space()
def simple_act_state_processor(monkeypatch) -> SimpleActStateProcessor: def inverse_ordering(pathogens: List[Pathogen], game_state: GameState) -> List[Pathogen]: return list(reversed(pathogens)) return SimpleActStateProcessor(inverse_ordering)
def test_transform_for_city_action( simple_act_state_processor: SimpleActStateProcessor, random_city_id: int, random_action_id: int): action: int = random_city_id * CITY_ACTIONSPACE + random_action_id assert simple_act_state_processor._transform_for_city_action(action) == ( random_action_id, random_city_id)