Beispiel #1
0
def build_credit_env(config=None, initial_state=None):
    """Construct credit environment that is parameterized by the initial state.

    This allows the user to specify different datasets other than the default
    Credit dataset.
    """
    if config is None:
        config = Config()
    elif not isinstance(config, Config):
        raise ValueError(f"Config must be an instance of {type(Config())}")
    if initial_state is None:
        initial_state = State()
    elif not isinstance(initial_state, State):
        raise ValueError(
            f"Initial state must be an instance of {type(State())}")

    config.base_state = copy.deepcopy(initial_state)

    return ODEEnvBuilder(
        simulate_fn=simulate,
        config=config,
        # The initial state is the baseline features and labels in the credit dataset
        initial_state=initial_state,
        # Action space is classifiers with the same number of parameters are
        # features.
        action_space=credit_action_space(initial_state),
        # Observation space is the strategically adapted features and labels
        observation_space=credit_observation_space(initial_state),
        timestep=1,
        intervention_fn=compute_intervention,
        reward_fn=compute_reward,
    )
Beispiel #2
0

def observation_space():
    """Return the model observation space."""
    num_states = State.num_variables()
    state_space_low = np.zeros(num_states)
    state_space_high = np.inf * np.ones(num_states)
    return spaces.Box(state_space_low, state_space_high, dtype=np.float64)


World3Env = ODEEnvBuilder(
    simulate_fn=simulate,
    # Smaller delta_t improves numerical stability
    config=Config(delta_t=0.5),
    initial_state=State(),
    # In this environment there are 9 actions defined by
    # nonrenewable_resource_usage and pollution_generation_factor.
    action_space=spaces.Discrete(9),
    observation_space=observation_space(),
    timestep=1.0,
    intervention_fn=get_intervention,
    reward_fn=get_reward,
)

register(
    id="world3-v0",
    entry_point=World3Env,
    max_episode_steps=400,
    reward_threshold=1e5,
)
Beispiel #3
0
CreditEnv = ODEEnvBuilder(
    simulate_fn=simulate,
    config=Config(),
    # The initial state is the baseline features and labels in the credit dataset
    initial_state=State(features=CreditData.features,
                        labels=CreditData.labels),
    # Action space is classifiers with the same number of parameters are
    # features.
    action_space=spaces.Box(low=-np.inf,
                            high=np.inf,
                            shape=(CreditData.num_features, ),
                            dtype=np.float64),
    # Observation space is the strategically adapted features and labels
    observation_space=spaces.Dict({
        "features":
        spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=CreditData.features.shape,
            dtype=np.float64,
        ),
        "labels":
        spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=CreditData.labels.shape,
            dtype=np.float64,
        ),
    }),
    timestep=1,
    intervention_fn=compute_intervention,
    reward_fn=compute_reward,
)
Beispiel #4
0
    # lost economic output per time
    reward -= economic_output_per_time * current_social_distancing
    return reward


def observation_space():
    """Return observation space.
    The state is (susceptible, exposed, infected, recovered).
    """
    state_dim = State.num_variables()
    state_space_low = np.zeros(state_dim)
    state_space_high = np.inf * np.ones(state_dim)
    return spaces.Box(state_space_low, state_space_high, dtype=np.float64)


Covid19Env = ODEEnvBuilder(
    simulate_fn=simulate,
    config=Config(),
    initial_state=State(),
    action_space=spaces.Discrete(6),
    observation_space=observation_space(),
    timestep=1.0,
    intervention_fn=get_intervention,
    reward_fn=get_reward,
)

register(
    id="COVID19-POOR-v0", entry_point=Covid19Env, max_episode_steps=150,
    reward_threshold=1e10,
)
Beispiel #5
0
def observation_space():
    """Return the observation space. The state is (nonmedical_users, oud_useres, illicit_users)."""
    state_dim = State.num_variables()
    state_space_low = np.zeros(state_dim)
    state_space_high = np.inf * np.ones(state_dim)
    return spaces.Box(state_space_low, state_space_high, dtype=np.float64)


OpioidEnv = ODEEnvBuilder(
    simulate_fn=simulate,
    config=Config(),
    initial_state=State(),
    # In this environment we define 4 actions:
    #   - Do nothing
    #   - Reduce nonmedical opioid use by 5%
    #   - Reduce illicit opioid use by 5%
    #   - Reduce both by 5%
    action_space=spaces.Discrete(4),
    observation_space=observation_space(),
    timestep=1.0,
    intervention_fn=get_intervention,
    reward_fn=get_reward,
)

register(
    id="opioid-v0",
    entry_point=OpioidEnv,
    # The simulator starts in 2002 and ends in 2030.
    max_episode_steps=28,
    reward_threshold=0,
)
Beispiel #6
0
 
    The state is (uninfected_T1, infected_T1, uninfected_T2, infected_T2,
    free_virus, immune_response) in units (cells/ml, cells/ml, cells/ml,
    cells/ml, copies/ml, cells/ml).
    """
    state_dim = State.num_variables()
    state_space_low = np.zeros(state_dim)
    state_space_high = np.inf * np.ones(state_dim)
    return spaces.Box(state_space_low, state_space_high, dtype=np.float64)


HivEnv = ODEEnvBuilder(
    simulate_fn=simulate,
    config=Config(),
    initial_state=State(),
    # In this environment there are 4 actions defined by
    # epsilon_1 = 0 or 0.7 and epsilon_2 = 0 or 0.3.
    action_space=spaces.Discrete(4),
    observation_space=observation_space(),
    timestep=1.0,
    intervention_fn=get_intervention,
    reward_fn=get_reward,
)

register(
    id="HIV-v0",
    entry_point=HivEnv,
    max_episode_steps=400,
    reward_threshold=1e10,
)