def __init__(self, trainer: Trainer.__class__, weights: str):
        if not ray.is_initialized():
            ray.init()
        self.obs_state_processor = SimpleObsStateProcessor(
            infected_population_sorting_per_city)
        self.act_state_processor = SimpleActStateProcessor(
            sort_pathogens=self.obs_state_processor.sort_pathogens)
        register_env(
            "ic20env", lambda _: InferenceIC20Environment(
                self.obs_state_processor, self.act_state_processor))

        self.trainer = self._load_trainer(trainer(env="ic20env"), weights)
Esempio n. 2
0
def test_update_city_pathogens_representations(
        simple_obs_state_processor: SimpleObsStateProcessor,
        available_pathogens: List[Pathogen]):
    status = 4  # medication
    infected_population = np.array([np.round(500 * .8)], dtype=np.uint32)
    pathogen_attributes = np.array([0, 1, 2, -2], dtype=np.int8)
    pathogen_representation = (status, infected_population,
                               pathogen_attributes)
    pathogen = available_pathogens[3]
    stub = simple_obs_state_processor._build_pathogen_stub()
    expected = [stub if i < 3 else pathogen_representation for i in range(4)]

    pathogen_list = []
    actual = simple_obs_state_processor._update_city_pathogens_representations(
        pathogen_list, pathogen, pathogen_representation, available_pathogens)
    assert compare(actual, expected) and len(actual) == len(expected)
Esempio n. 3
0
def test_preprocess_obs(simple_obs_state_processor: SimpleObsStateProcessor,
                        gamestate_stub: GameState,
                        available_pathogens: List[Pathogen]):
    expected_city_obs = []
    for city in gamestate_stub.cities:
        location = (np.array([90], dtype=np.float32),
                    np.array([-90], dtype=np.float32))
        population = np.array([500], dtype=np.uint32)
        connections = np.int64(2)
        attributes = np.array([0, 1, 2, -2], dtype=np.int8)
        pathogens = simple_obs_state_processor._build_pathogen_obs_representation(
            city.pathogens, city.population,
            list(reversed(available_pathogens)), gamestate_stub)
        expected_city_obs.append(
            (location, population, connections, attributes, pathogens))

    actual_city_obs = simple_obs_state_processor.preprocess_obs(gamestate_stub)
    assert compare(actual_city_obs, expected_city_obs)
Esempio n. 4
0
def test_map_pathogen_status(
        simple_obs_state_processor: SimpleObsStateProcessor,
        available_pathogens: List[Pathogen], gamestate_stub: GameState):
    patho0 = 5  # vaccination exists
    patho1 = 4  # medication exists
    patho2 = 8  # medication && vaccination exist
    patho3 = 6  # medication exists vaccination in dev
    patho4 = 7  # vaccination exists medication in dev
    patho_states = (patho0, patho1, patho2, patho3, patho4)
    for pathogen, patho_state in zip(available_pathogens, patho_states):
        assert simple_obs_state_processor._map_pathogen_status(
            pathogen, gamestate_stub) == patho_state
Esempio n. 5
0
def test_build_pathogen_obs_representation(
        simple_obs_state_processor: SimpleObsStateProcessor,
        city_with_pathogens: City, available_pathogens: List[Pathogen],
        gamestate_stub: GameState):
    # GIVEN
    city_pathogens = city_with_pathogens.pathogens
    city_population = 5000

    # WHEN
    actual_pathogen_representation = simple_obs_state_processor._build_pathogen_obs_representation(
        city_pathogens, city_population, available_pathogens, gamestate_stub)
    # THEN
    expected_pathogen_representations = []

    # patho1
    status = 4  # medication
    infected_population = np.array([np.round(city_population * .8)],
                                   dtype=np.uint32)
    pathogen_attributes = np.array([0, 1, 2, -2], dtype=np.int8)
    expected_pathogen_representations.append(
        (status, infected_population, pathogen_attributes))

    # patho2
    status = 8  # vaccination && medication
    infected_population = np.array([np.round(city_population * .8)],
                                   dtype=np.uint32)
    pathogen_attributes = np.array([0, 1, 2, -2], dtype=np.int8)
    expected_pathogen_representations.append(
        (status, infected_population, pathogen_attributes))

    expected_pathogen_representations.append(
        simple_obs_state_processor._build_pathogen_stub())
    expected_pathogen_representations.append(
        simple_obs_state_processor._build_pathogen_stub())
    expected_pathogen_representations.append(
        simple_obs_state_processor._build_pathogen_stub())
    expected_pathogen_representations = tuple(
        expected_pathogen_representations)
    assert compare(actual_pathogen_representation,
                   expected_pathogen_representations)
class ReinforcedApproach(Approach):
    trial_max = 10

    def __init__(self, trainer: Trainer.__class__, weights: str):
        if not ray.is_initialized():
            ray.init()
        self.obs_state_processor = SimpleObsStateProcessor(
            infected_population_sorting_per_city)
        self.act_state_processor = SimpleActStateProcessor(
            sort_pathogens=self.obs_state_processor.sort_pathogens)
        register_env(
            "ic20env", lambda _: InferenceIC20Environment(
                self.obs_state_processor, self.act_state_processor))

        self.trainer = self._load_trainer(trainer(env="ic20env"), weights)

    def process_round(self, state: GameState):
        return self._choose_actionable_action(state)

    def _choose_actionable_action(self, state: GameState) -> Action:
        processed_state = self.obs_state_processor.preprocess_obs(state)
        mapped_action = INVALID_ACTION

        trial_count = 0
        while (mapped_action == INVALID_ACTION or mapped_action.cost > state.points) \
                or mapped_action not in actions.generate_possible_actions(state):
            action = self.trainer.compute_action(observation=processed_state)
            mapped_action, _ = self.act_state_processor.map_action(
                action, state)

            trial_count += 1
            if trial_count >= self.trial_max:
                mapped_action = actions.end_round()
                break

        return mapped_action

    @classmethod
    def _load_trainer(cls, trainer: Trainer, weights_path: str) -> Trainer:
        trainer.restore(weights_path)
        return trainer
from ray.tune.logger import pretty_print
from ray.tune.util import merge_dicts

from approaches.reinforced.action_state_processor import SimpleActStateProcessor
from approaches.reinforced.constants import DEFAULT_CONFIG
from approaches.reinforced.environment import SimplifiedIC20Environment, CHECKPOINT_FILE
from approaches.reinforced.observation_state_processor import SimpleObsStateProcessor, \
    infected_population_sorting_per_city
# won't start sgd
from approaches.reinforced.reward_function import UnstableReward

if __name__ == "__main__":
    ray.init(
        address='auto'
    )  # address = None when running locally. address = 'auto' when running on aws.]
    obs_state_processor = SimpleObsStateProcessor(
        pathogen_sorting_strategy=infected_population_sorting_per_city)
    act_state_processor = SimpleActStateProcessor(
        sort_pathogens=obs_state_processor.sort_pathogens)

    # Notice that trial_max will only work for stochastic policies
    register_env(
        "ic20env", lambda _: SimplifiedIC20Environment(obs_state_processor,
                                                       act_state_processor,
                                                       UnstableReward(),
                                                       trial_max=10))
    ten_gig = 10737418240

    trainer = A2CTrainer(
        env="ic20env",
        config=merge_dicts(
            DEFAULT_CONFIG,
Esempio n. 8
0
def test_sort_pathogens(simple_obs_state_processor: SimpleObsStateProcessor,
                        available_pathogens: List[Pathogen],
                        gamestate_stub: GameState):
    assert (list(reversed(available_pathogens)) ==
            simple_obs_state_processor.sort_pathogens(available_pathogens,
                                                      gamestate_stub))
Esempio n. 9
0
def simple_obs_state_processor(
        available_pathogens: List[Pathogen]) -> SimpleObsStateProcessor:
    def identity_ordering(city: City, pathogen: Pathogen) -> float:
        return available_pathogens.index(pathogen)

    return SimpleObsStateProcessor(identity_ordering)
Esempio n. 10
0
def test_aggregate_obs_space_over_cities(
        simple_obs_state_processor: SimpleObsStateProcessor):
    example_space = gym.spaces.Discrete(5)
    assert [example_space for _ in range(MAX_CITIES)] \
           == simple_obs_state_processor._aggregate_obs_space_over_cities(example_space)
Esempio n. 11
0
def test_get_pathogen_population(
        simple_obs_state_processor: SimpleObsStateProcessor,
        gamestate_stub: GameState, available_pathogens: List[Pathogen]):
    for pathogen in available_pathogens:
        assert simple_obs_state_processor._get_pathogen_population(gamestate_stub, pathogen) \
               == len(gamestate_stub.cities) * 500 * .8