Example #1
0
def test_cts():
    env = gym.make("fishing-v1")
    check_env(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "v1_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "v1_escapement-test.png")
Example #2
0
def test_discrete():
    env = gym.make("fishing-v0")
    check_env(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "v0_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "v0_escapement-test.png")
Example #3
0
def test_ricker():
    env = gym.make("fishing-v9", sigma=0)
    check_env(env)
    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "ricker_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "ricker_escapement-test.png")
Example #4
0
def test_myers():
    env = gym.make("fishing-v8", sigma=0)
    check_env(env)
    model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "myers_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "myers_escapement-test.png")
Example #5
0
def test_beverton_holt():
    env = gym.make("fishing-v6", sigma=0)
    check_env(env)
    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "bh_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "bh_escapement-test.png")
Example #6
0
def test_nonstationary():
    env = gym.make("fishing-v10", sigma=0, alpha=-0.007)
    check_env(env)
    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "ns_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "ns_escapement-test.png")
Example #7
0
def test_model_uncertainty():
    np.random.seed(0)
    env = gym.make("fishing-v11")
    check_env(env)
    model = user_action(env)
    model = msy(env)
    df = env.simulate(model, reps=10)
    env.plot(df, "mu_msy-test.png")
    model = escapement(env)
    df = env.simulate(model, reps=10)
    env.plot(df, "mu_escapement-test.png")
Example #8
0
def run_optimal(env_name, r=0.1, K=1, sigma=0.01):
    '''
    :param env_name: 'v0','v1', 'v2','v4'
    :param r:
    :param K:
    :param sigma:
    :return:
    '''
    if env_name != 'v4':
        env = gym.make('fishing-' + env_name, r=r, K=K, sigma=sigma)
    else:
        env = gym.make('fishing-' + env_name, sigma=sigma)

    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "msy-" + env_name + ".png")

    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "escapement-" + env_name + ".png")
    return
Example #9
0
def test_tipping():
    np.random.seed(0)
    env = gym.make("fishing-v2", sigma=0, init_state=0.75)
    check_env(env)
    env.reset()
    # increases above tipping point
    obs, reward, done, info = env.step(env.get_action(0))
    assert env.get_fish_population(obs) >= 0.75

    # Decreases below the tipping point
    env.init_state = 0.3
    env.reset()
    obs, reward, done, info = env.step(env.get_action(0))
    assert env.get_fish_population(obs) <= 0.3

    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "tip_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "tip_escapement-test.png")
tensorboard_log = "/var/log/tensorboard/leaderboard"

ENV = "fishing-v1"
env = gym.make(ENV)

## Constant Escapement ######################################################
model = escapement(env)
df = env.simulate(model, reps=10)
env.plot(df, "results/escapement.png")
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)
leaderboard("ESC", ENV, mean_reward, std_reward, url)
print("algo:", "ESC", "env:", ENV, "mean reward:", mean_reward, "std:",
      std_reward)

## MSY ######################################################################
model = msy(env)
df = env.simulate(model, reps=10)
env.plot(df, "results/msy.png")
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)
leaderboard("MSY", ENV, mean_reward, std_reward, url)
print("algo:", "MSY", "env:", ENV, "mean reward:", mean_reward, "std:",
      std_reward)

# Consider running these in parallel?

## PPO ######################################################################

# load best tuned parameters...

model = PPO('MlpPolicy', env, verbose=0, tensorboard_log=tensorboard_log)
model.learn(total_timesteps=300000)