def observation_space(): """Return the model observation space.""" num_states = State.num_variables() state_space_low = np.zeros(num_states) state_space_high = np.inf * np.ones(num_states) return spaces.Box(state_space_low, state_space_high, dtype=np.float64) World3Env = ODEEnvBuilder( simulate_fn=simulate, # Smaller delta_t improves numerical stability config=Config(delta_t=0.5), initial_state=State(), # In this environment there are 9 actions defined by # nonrenewable_resource_usage and pollution_generation_factor. action_space=spaces.Discrete(9), observation_space=observation_space(), timestep=1.0, intervention_fn=get_intervention, reward_fn=get_reward, ) register( id="world3-v0", entry_point=World3Env, max_episode_steps=400, reward_threshold=1e5, )
if initial_state is None: initial_state = State() elif not isinstance(initial_state, State): raise ValueError( f"Initial state must be an instance of {type(State())}") config.base_state = copy.deepcopy(initial_state) return ODEEnvBuilder( simulate_fn=simulate, config=config, # The initial state is the baseline features and labels in the credit dataset initial_state=initial_state, # Action space is classifiers with the same number of parameters are # features. action_space=credit_action_space(initial_state), # Observation space is the strategically adapted features and labels observation_space=credit_observation_space(initial_state), timestep=1, intervention_fn=compute_intervention, reward_fn=compute_reward, ) register( id="Credit-v0", entry_point=build_credit_env, max_episode_steps=100, reward_threshold=0, )
# lost economic output per time reward -= economic_output_per_time * current_social_distancing return reward def observation_space(): """Return observation space. The state is (susceptible, exposed, infected, recovered). """ state_dim = State.num_variables() state_space_low = np.zeros(state_dim) state_space_high = np.inf * np.ones(state_dim) return spaces.Box(state_space_low, state_space_high, dtype=np.float64) Covid19Env = ODEEnvBuilder( simulate_fn=simulate, config=Config(), initial_state=State(), action_space=spaces.Discrete(6), observation_space=observation_space(), timestep=1.0, intervention_fn=get_intervention, reward_fn=get_reward, ) register( id="COVID19-POOR-v0", entry_point=Covid19Env, max_episode_steps=150, reward_threshold=1e10, )
"""Return the observation space. The state is (nonmedical_users, oud_useres, illicit_users).""" state_dim = State.num_variables() state_space_low = np.zeros(state_dim) state_space_high = np.inf * np.ones(state_dim) return spaces.Box(state_space_low, state_space_high, dtype=np.float64) OpioidEnv = ODEEnvBuilder( simulate_fn=simulate, config=Config(), initial_state=State(), # In this environment we define 4 actions: # - Do nothing # - Reduce nonmedical opioid use by 5% # - Reduce illicit opioid use by 5% # - Reduce both by 5% action_space=spaces.Discrete(4), observation_space=observation_space(), timestep=1.0, intervention_fn=get_intervention, reward_fn=get_reward, ) register( id="opioid-v0", entry_point=OpioidEnv, # The simulator starts in 2002 and ends in 2030. max_episode_steps=28, reward_threshold=0, )
return spaces.Box(state_space_low, state_space_high, dtype=np.float64) def action_space(): """Return action space. There are four control variables in the model: - Treated bednet use - Condom use - Direct treatment of infected humans - Indoor residual spray use. """ return spaces.Box(np.zeros(4), np.ones(4), dtype=np.float64) ZikaEnv = ODEEnvBuilder( simulate_fn=simulate, config=Config(), initial_state=State(), action_space=action_space(), observation_space=observation_space(), timestep=1.0, intervention_fn=get_intervention, reward_fn=get_reward, ) register( id="Zika-v0", entry_point=ZikaEnv, max_episode_steps=200, reward_threshold=1e10, )
The state is (uninfected_T1, infected_T1, uninfected_T2, infected_T2, free_virus, immune_response) in units (cells/ml, cells/ml, cells/ml, cells/ml, copies/ml, cells/ml). """ state_dim = State.num_variables() state_space_low = np.zeros(state_dim) state_space_high = np.inf * np.ones(state_dim) return spaces.Box(state_space_low, state_space_high, dtype=np.float64) HivEnv = ODEEnvBuilder( simulate_fn=simulate, config=Config(), initial_state=State(), # In this environment there are 4 actions defined by # epsilon_1 = 0 or 0.7 and epsilon_2 = 0 or 0.3. action_space=spaces.Discrete(4), observation_space=observation_space(), timestep=1.0, intervention_fn=get_intervention, reward_fn=get_reward, ) register( id="HIV-v0", entry_point=HivEnv, max_episode_steps=400, reward_threshold=1e10, )