def __init__(self, trainer: Trainer.__class__, weights: str):
        if not ray.is_initialized():
            ray.init()
        self.obs_state_processor = SimpleObsStateProcessor(
            infected_population_sorting_per_city)
        self.act_state_processor = SimpleActStateProcessor(
            sort_pathogens=self.obs_state_processor.sort_pathogens)
        register_env(
            "ic20env", lambda _: InferenceIC20Environment(
                self.obs_state_processor, self.act_state_processor))

        self.trainer = self._load_trainer(trainer(env="ic20env"), weights)
from ray.tune.logger import pretty_print
from ray.tune.util import merge_dicts

from approaches.reinforced.action_state_processor import SimpleActStateProcessor
from approaches.reinforced.constants import DEFAULT_CONFIG
from approaches.reinforced.environment import SimplifiedIC20Environment, CHECKPOINT_FILE
from approaches.reinforced.observation_state_processor import SimpleObsStateProcessor, \
    infected_population_sorting_per_city
# won't start sgd
from approaches.reinforced.reward_function import UnstableReward

if __name__ == "__main__":
    ray.init(
        address='auto'
    )  # address = None when running locally. address = 'auto' when running on aws.]
    obs_state_processor = SimpleObsStateProcessor(
        pathogen_sorting_strategy=infected_population_sorting_per_city)
    act_state_processor = SimpleActStateProcessor(
        sort_pathogens=obs_state_processor.sort_pathogens)

    # Notice that trial_max will only work for stochastic policies
    register_env(
        "ic20env", lambda _: SimplifiedIC20Environment(obs_state_processor,
                                                       act_state_processor,
                                                       UnstableReward(),
                                                       trial_max=10))
    ten_gig = 10737418240

    trainer = A2CTrainer(
        env="ic20env",
        config=merge_dicts(
            DEFAULT_CONFIG,
Esempio n. 3
0
def simple_obs_state_processor(
        available_pathogens: List[Pathogen]) -> SimpleObsStateProcessor:
    def identity_ordering(city: City, pathogen: Pathogen) -> float:
        return available_pathogens.index(pathogen)

    return SimpleObsStateProcessor(identity_ordering)