def test_generate_global_vaccine_actions(
        simple_act_state_processor: SimpleActStateProcessor,
        available_pathogens: List[Pathogen],
        pathogens_with_vaccination: List[Pathogen]):
    develop_vaccine_actions = [
        actions.develop_vaccine(pathogen.index) if
        (pathogen not in pathogens_with_vaccination) else INVALID_ACTION
        for pathogen in available_pathogens
    ]

    assert list(
        simple_act_state_processor._generate_global_vaccine_actions(
            available_pathogens,
            pathogens_with_vaccination)) == develop_vaccine_actions

    assert list(
        simple_act_state_processor._generate_global_vaccine_actions(
            available_pathogens, list(reversed(
                pathogens_with_vaccination)))) == develop_vaccine_actions

    assert list(
        simple_act_state_processor._generate_global_vaccine_actions(
            list(reversed(available_pathogens)),
            pathogens_with_vaccination)) == list(
                reversed(develop_vaccine_actions))
def test_map_global_actions(
        simple_act_state_processor: SimpleActStateProcessor,
        gamestate_stub: GameState, random_action_id: int):
    chosen_action: int = random_action_id
    pathogens_with_vacc_or_med = (1, 2, 3, 5, 6, 7, 8, 9)
    if random_action_id == 0 or random_action_id not in pathogens_with_vacc_or_med:
        assert simple_act_state_processor._map_global_actions(chosen_action, gamestate_stub) \
               in actions.generate_possible_actions(gamestate_stub)
    else:
        assert simple_act_state_processor._map_global_actions(
            chosen_action, gamestate_stub) == INVALID_ACTION
def test_map_city_actions(simple_act_state_processor: SimpleActStateProcessor,
                          gamestate_stub: GameState, random_action_id: int):
    chosen_action: int = random_action_id % CITY_ACTIONSPACE
    pathogens_with_vacc_or_med = (6, 8, 10, 12, 13, 14)
    if chosen_action in range(
            0, 6) or chosen_action in pathogens_with_vacc_or_med:
        assert simple_act_state_processor._map_city_actions(chosen_action, gamestate_stub) \
               in actions.generate_possible_actions(gamestate_stub)
    else:
        assert simple_act_state_processor._map_city_actions(
            chosen_action, gamestate_stub) == INVALID_ACTION
    def __init__(self, trainer: Trainer.__class__, weights: str):
        if not ray.is_initialized():
            ray.init()
        self.obs_state_processor = SimpleObsStateProcessor(
            infected_population_sorting_per_city)
        self.act_state_processor = SimpleActStateProcessor(
            sort_pathogens=self.obs_state_processor.sort_pathogens)
        register_env(
            "ic20env", lambda _: InferenceIC20Environment(
                self.obs_state_processor, self.act_state_processor))

        self.trainer = self._load_trainer(trainer(env="ic20env"), weights)
def test_penalize_action(simple_act_state_processor: SimpleActStateProcessor,
                         gamestate_stub: Mock, monkeypatch,
                         chosen_action: Optional[Action],
                         possible_actions: List[Action],
                         expected_penalty: int):
    # GIVEN
    monkeypatch.setattr('models.actions.generate_possible_actions',
                        lambda _: possible_actions)
    # THEN
    assert simple_act_state_processor.penalize_action(chosen_action, gamestate_stub) \
           == (chosen_action, expected_penalty)
def test_generate_city_med_actions(
        simple_act_state_processor: SimpleActStateProcessor,
        city_with_pathogens: City, available_pathogens: List[Pathogen],
        pathogens_with_medication: List[Pathogen]):
    deploy_med_actions = [
        actions.deploy_medication(pathogen.index, city_with_pathogens.index) if
        (pathogen in city_with_pathogens.pathogens
         and pathogen in pathogens_with_medication) else INVALID_ACTION
        for pathogen in available_pathogens
    ]

    assert simple_act_state_processor._generate_city_med_actions(
        city_with_pathogens, available_pathogens,
        pathogens_with_medication) == deploy_med_actions

    assert simple_act_state_processor._generate_city_med_actions(
        city_with_pathogens, available_pathogens,
        list(reversed(pathogens_with_medication))) == deploy_med_actions

    assert simple_act_state_processor._generate_city_med_actions(
        city_with_pathogens, list(reversed(available_pathogens)),
        pathogens_with_medication) == list(reversed(deploy_med_actions))
def test_map_action(simple_act_state_processor: SimpleActStateProcessor,
                    gamestate_stub: GameState, random_action_id: int):
    if random_action_id < GLOBAL_ACTIONSPACE:
        expected = simple_act_state_processor.penalize_action(
            simple_act_state_processor._map_global_actions(
                random_action_id, gamestate_stub), gamestate_stub)
        assert expected == simple_act_state_processor.map_action(
            random_action_id, gamestate_stub)
    else:
        random_city_action_id = random_action_id - GLOBAL_ACTIONSPACE
        expected = simple_act_state_processor.penalize_action(
            simple_act_state_processor._map_city_actions(
                random_city_action_id, gamestate_stub), gamestate_stub)
        assert expected == simple_act_state_processor.map_action(
            random_action_id, gamestate_stub)
class ReinforcedApproach(Approach):
    trial_max = 10

    def __init__(self, trainer: Trainer.__class__, weights: str):
        if not ray.is_initialized():
            ray.init()
        self.obs_state_processor = SimpleObsStateProcessor(
            infected_population_sorting_per_city)
        self.act_state_processor = SimpleActStateProcessor(
            sort_pathogens=self.obs_state_processor.sort_pathogens)
        register_env(
            "ic20env", lambda _: InferenceIC20Environment(
                self.obs_state_processor, self.act_state_processor))

        self.trainer = self._load_trainer(trainer(env="ic20env"), weights)

    def process_round(self, state: GameState):
        return self._choose_actionable_action(state)

    def _choose_actionable_action(self, state: GameState) -> Action:
        processed_state = self.obs_state_processor.preprocess_obs(state)
        mapped_action = INVALID_ACTION

        trial_count = 0
        while (mapped_action == INVALID_ACTION or mapped_action.cost > state.points) \
                or mapped_action not in actions.generate_possible_actions(state):
            action = self.trainer.compute_action(observation=processed_state)
            mapped_action, _ = self.act_state_processor.map_action(
                action, state)

            trial_count += 1
            if trial_count >= self.trial_max:
                mapped_action = actions.end_round()
                break

        return mapped_action

    @classmethod
    def _load_trainer(cls, trainer: Trainer, weights_path: str) -> Trainer:
        trainer.restore(weights_path)
        return trainer
from approaches.reinforced.action_state_processor import SimpleActStateProcessor
from approaches.reinforced.constants import DEFAULT_CONFIG
from approaches.reinforced.environment import SimplifiedIC20Environment, CHECKPOINT_FILE
from approaches.reinforced.observation_state_processor import SimpleObsStateProcessor, \
    infected_population_sorting_per_city
# won't start sgd
from approaches.reinforced.reward_function import UnstableReward

if __name__ == "__main__":
    ray.init(
        address='auto'
    )  # address = None when running locally. address = 'auto' when running on aws.]
    obs_state_processor = SimpleObsStateProcessor(
        pathogen_sorting_strategy=infected_population_sorting_per_city)
    act_state_processor = SimpleActStateProcessor(
        sort_pathogens=obs_state_processor.sort_pathogens)

    # Notice that trial_max will only work for stochastic policies
    register_env(
        "ic20env", lambda _: SimplifiedIC20Environment(obs_state_processor,
                                                       act_state_processor,
                                                       UnstableReward(),
                                                       trial_max=10))
    ten_gig = 10737418240

    trainer = A2CTrainer(
        env="ic20env",
        config=merge_dicts(
            DEFAULT_CONFIG,
            {
                # -- Specific parameters
def test_generate_action_space(
        simple_act_state_processor: SimpleActStateProcessor):
    assert spaces.Discrete(
        MAX_ACTIONSPACE) == simple_act_state_processor.generate_action_space()
def simple_act_state_processor(monkeypatch) -> SimpleActStateProcessor:
    def inverse_ordering(pathogens: List[Pathogen],
                         game_state: GameState) -> List[Pathogen]:
        return list(reversed(pathogens))

    return SimpleActStateProcessor(inverse_ordering)
def test_transform_for_city_action(
        simple_act_state_processor: SimpleActStateProcessor,
        random_city_id: int, random_action_id: int):
    action: int = random_city_id * CITY_ACTIONSPACE + random_action_id
    assert simple_act_state_processor._transform_for_city_action(action) == (
        random_action_id, random_city_id)