Exemple #1
0
def test_cts():
    env = gym.make("fishing-v1")
    check_env(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "v1_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "v1_escapement-test.png")
Exemple #2
0
def test_discrete():
    env = gym.make("fishing-v0")
    check_env(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "v0_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "v0_escapement-test.png")
Exemple #3
0
def test_ricker():
    env = gym.make("fishing-v9", sigma=0)
    check_env(env)
    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "ricker_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "ricker_escapement-test.png")
Exemple #4
0
def test_myers():
    env = gym.make("fishing-v8", sigma=0)
    check_env(env)
    model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "myers_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "myers_escapement-test.png")
Exemple #5
0
def test_beverton_holt():
    env = gym.make("fishing-v6", sigma=0)
    check_env(env)
    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "bh_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "bh_escapement-test.png")
Exemple #6
0
def test_nonstationary():
    env = gym.make("fishing-v10", sigma=0, alpha=-0.007)
    check_env(env)
    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "ns_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "ns_escapement-test.png")
Exemple #7
0
def test_model_uncertainty():
    np.random.seed(0)
    env = gym.make("fishing-v11")
    check_env(env)
    model = user_action(env)
    model = msy(env)
    df = env.simulate(model, reps=10)
    env.plot(df, "mu_msy-test.png")
    model = escapement(env)
    df = env.simulate(model, reps=10)
    env.plot(df, "mu_escapement-test.png")
Exemple #8
0
def run_optimal(env_name, r=0.1, K=1, sigma=0.01):
    '''
    :param env_name: 'v0','v1', 'v2','v4'
    :param r:
    :param K:
    :param sigma:
    :return:
    '''
    if env_name != 'v4':
        env = gym.make('fishing-' + env_name, r=r, K=K, sigma=sigma)
    else:
        env = gym.make('fishing-' + env_name, sigma=sigma)

    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "msy-" + env_name + ".png")

    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "escapement-" + env_name + ".png")
    return
Exemple #9
0
def test_tipping():
    np.random.seed(0)
    env = gym.make("fishing-v2", sigma=0, init_state=0.75)
    check_env(env)
    env.reset()
    # increases above tipping point
    obs, reward, done, info = env.step(env.get_action(0))
    assert env.get_fish_population(obs) >= 0.75

    # Decreases below the tipping point
    env.init_state = 0.3
    env.reset()
    obs, reward, done, info = env.step(env.get_action(0))
    assert env.get_fish_population(obs) <= 0.3

    # model = user_action(env)
    model = msy(env)
    df = env.simulate(model)
    env.plot(df, "tip_msy-test.png")
    model = escapement(env)
    df = env.simulate(model)
    env.plot(df, "tip_escapement-test.png")
from stable_baselines3 import SAC, TD3, A2C, PPO, DDPG, DQN
from stable_baselines3.common.evaluation import evaluate_policy
from leaderboard import leaderboard, hash_url
import os
from torch import nn as nn

#file = os.path.basename(__file__)
file = "compute_leaderboard.py"
url = hash_url(file)  # get hash URL at start of execution
tensorboard_log = "/var/log/tensorboard/leaderboard"

ENV = "fishing-v1"
env = gym.make(ENV)

## Constant Escapement ######################################################
model = escapement(env)
df = env.simulate(model, reps=10)
env.plot(df, "results/escapement.png")
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)
leaderboard("ESC", ENV, mean_reward, std_reward, url)
print("algo:", "ESC", "env:", ENV, "mean reward:", mean_reward, "std:",
      std_reward)

## MSY ######################################################################
model = msy(env)
df = env.simulate(model, reps=10)
env.plot(df, "results/msy.png")
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)
leaderboard("MSY", ENV, mean_reward, std_reward, url)
print("algo:", "MSY", "env:", ENV, "mean reward:", mean_reward, "std:",
      std_reward)
#file = os.path.basename(__file__)
file = "compute_leaderboard.py"
url = hash_url(file)  # get hash URL at start of execution
tensorboard_log = "/var/log/tensorboard/leaderboard"

seed = 0

ENV = "fishing-v1"
env = gym.make(ENV, sigma=0.1)

vec_env = make_vec_env(ENV, n_envs=4, seed=seed,
                       sigma=0.1)  # parallel workers for PPO, A2C

## Constant Escapement ######################################################
model = escapement(env)
df = env.simulate(model, reps=10)
env.plot(df, "results/escapement.png")
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)
leaderboard("ESC", ENV, mean_reward, std_reward, url)
print("algo:", "ESC", "env:", ENV, "mean reward:", mean_reward, "std:",
      std_reward)

## MSY ######################################################################
model = msy(env)
df = env.simulate(model, reps=10)
env.plot(df, "results/msy.png")
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)
# Rescale score against optimum solution in this environment
opt = escapement(env)
opt_reward, std_reward = evaluate_policy(opt, env, n_eval_episodes=100)
Exemple #12
0
model = SAC('MlpPolicy', 
            env, verbose=0, tensorboard_log=tensorboard_log, seed = seed,
            use_sde=True,
            gamma = hyper['gamma'],
            learning_rate = hyper['lr'],
            batch_size = hyper['batch_size'],            
            buffer_size = hyper['buffer_size'],
            learning_starts = hyper['learning_starts'],
            train_freq = hyper['train_freq'],
            tau = hyper['tau'],
            policy_kwargs=policy_kwargs)
model.learn(total_timesteps=300000)

## Evaluate model
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=100)
# Rescale score against optimum solution in this environment
opt = escapement(env)
opt_reward, std_reward = evaluate_policy(opt, env, n_eval_episodes=100)
mean_reward = mean_reward / opt_reward; std_reward = std_reward / opt_reward   
leaderboard("SAC", ENV, mean_reward, std_reward, url)
print("algo:", "SAC", "env:", ENV, "mean reward:", mean_reward, "std:", std_reward)


## simulate and plot results
df = env.simulate(model, reps=10)
env.plot(df, "results/sac.png")
policy = env.policyfn(model, reps=10)
env.plot_policy(policy, "results/sac-policy.png")

model.save("models/sac-v1")