def test_cts(): env = gym.make("fishing-v1") check_env(env) model = msy(env) df = env.simulate(model) env.plot(df, "v1_msy-test.png") model = escapement(env) df = env.simulate(model) env.plot(df, "v1_escapement-test.png")
def test_discrete(): env = gym.make("fishing-v0") check_env(env) model = msy(env) df = env.simulate(model) env.plot(df, "v0_msy-test.png") model = escapement(env) df = env.simulate(model) env.plot(df, "v0_escapement-test.png")
def test_ricker(): env = gym.make("fishing-v9", sigma=0) check_env(env) # model = user_action(env) model = msy(env) df = env.simulate(model) env.plot(df, "ricker_msy-test.png") model = escapement(env) df = env.simulate(model) env.plot(df, "ricker_escapement-test.png")
def test_myers(): env = gym.make("fishing-v8", sigma=0) check_env(env) model = user_action(env) model = msy(env) df = env.simulate(model) env.plot(df, "myers_msy-test.png") model = escapement(env) df = env.simulate(model) env.plot(df, "myers_escapement-test.png")
def test_beverton_holt(): env = gym.make("fishing-v6", sigma=0) check_env(env) # model = user_action(env) model = msy(env) df = env.simulate(model) env.plot(df, "bh_msy-test.png") model = escapement(env) df = env.simulate(model) env.plot(df, "bh_escapement-test.png")
def test_nonstationary(): env = gym.make("fishing-v10", sigma=0, alpha=-0.007) check_env(env) # model = user_action(env) model = msy(env) df = env.simulate(model) env.plot(df, "ns_msy-test.png") model = escapement(env) df = env.simulate(model) env.plot(df, "ns_escapement-test.png")
def test_model_uncertainty(): np.random.seed(0) env = gym.make("fishing-v11") check_env(env) model = user_action(env) model = msy(env) df = env.simulate(model, reps=10) env.plot(df, "mu_msy-test.png") model = escapement(env) df = env.simulate(model, reps=10) env.plot(df, "mu_escapement-test.png")
def run_optimal(env_name, r=0.1, K=1, sigma=0.01): ''' :param env_name: 'v0','v1', 'v2','v4' :param r: :param K: :param sigma: :return: ''' if env_name != 'v4': env = gym.make('fishing-' + env_name, r=r, K=K, sigma=sigma) else: env = gym.make('fishing-' + env_name, sigma=sigma) model = msy(env) df = env.simulate(model) env.plot(df, "msy-" + env_name + ".png") model = escapement(env) df = env.simulate(model) env.plot(df, "escapement-" + env_name + ".png") return
def test_tipping(): np.random.seed(0) env = gym.make("fishing-v2", sigma=0, init_state=0.75) check_env(env) env.reset() # increases above tipping point obs, reward, done, info = env.step(env.get_action(0)) assert env.get_fish_population(obs) >= 0.75 # Decreases below the tipping point env.init_state = 0.3 env.reset() obs, reward, done, info = env.step(env.get_action(0)) assert env.get_fish_population(obs) <= 0.3 # model = user_action(env) model = msy(env) df = env.simulate(model) env.plot(df, "tip_msy-test.png") model = escapement(env) df = env.simulate(model) env.plot(df, "tip_escapement-test.png")
from stable_baselines3 import SAC, TD3, A2C, PPO, DDPG, DQN from stable_baselines3.common.evaluation import evaluate_policy from leaderboard import leaderboard, hash_url import os from torch import nn as nn #file = os.path.basename(__file__) file = "compute_leaderboard.py" url = hash_url(file) # get hash URL at start of execution tensorboard_log = "/var/log/tensorboard/leaderboard" ENV = "fishing-v1" env = gym.make(ENV) ## Constant Escapement ###################################################### model = escapement(env) df = env.simulate(model, reps=10) env.plot(df, "results/escapement.png") mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000) leaderboard("ESC", ENV, mean_reward, std_reward, url) print("algo:", "ESC", "env:", ENV, "mean reward:", mean_reward, "std:", std_reward) ## MSY ###################################################################### model = msy(env) df = env.simulate(model, reps=10) env.plot(df, "results/msy.png") mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000) leaderboard("MSY", ENV, mean_reward, std_reward, url) print("algo:", "MSY", "env:", ENV, "mean reward:", mean_reward, "std:", std_reward)
#file = os.path.basename(__file__) file = "compute_leaderboard.py" url = hash_url(file) # get hash URL at start of execution tensorboard_log = "/var/log/tensorboard/leaderboard" seed = 0 ENV = "fishing-v1" env = gym.make(ENV, sigma=0.1) vec_env = make_vec_env(ENV, n_envs=4, seed=seed, sigma=0.1) # parallel workers for PPO, A2C ## Constant Escapement ###################################################### model = escapement(env) df = env.simulate(model, reps=10) env.plot(df, "results/escapement.png") mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000) leaderboard("ESC", ENV, mean_reward, std_reward, url) print("algo:", "ESC", "env:", ENV, "mean reward:", mean_reward, "std:", std_reward) ## MSY ###################################################################### model = msy(env) df = env.simulate(model, reps=10) env.plot(df, "results/msy.png") mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000) # Rescale score against optimum solution in this environment opt = escapement(env) opt_reward, std_reward = evaluate_policy(opt, env, n_eval_episodes=100)
model = SAC('MlpPolicy', env, verbose=0, tensorboard_log=tensorboard_log, seed = seed, use_sde=True, gamma = hyper['gamma'], learning_rate = hyper['lr'], batch_size = hyper['batch_size'], buffer_size = hyper['buffer_size'], learning_starts = hyper['learning_starts'], train_freq = hyper['train_freq'], tau = hyper['tau'], policy_kwargs=policy_kwargs) model.learn(total_timesteps=300000) ## Evaluate model mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=100) # Rescale score against optimum solution in this environment opt = escapement(env) opt_reward, std_reward = evaluate_policy(opt, env, n_eval_episodes=100) mean_reward = mean_reward / opt_reward; std_reward = std_reward / opt_reward leaderboard("SAC", ENV, mean_reward, std_reward, url) print("algo:", "SAC", "env:", ENV, "mean reward:", mean_reward, "std:", std_reward) ## simulate and plot results df = env.simulate(model, reps=10) env.plot(df, "results/sac.png") policy = env.policyfn(model, reps=10) env.plot_policy(policy, "results/sac-policy.png") model.save("models/sac-v1")