import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res, load_train_res, train_results_plots env = gym.make('12AX_CPT-v0', size=200) N_tr = 5000 N_tst = 500 n_hidden = 50 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) res = train(env, agent, N_tr, print_progress=True, seed=123) save_train_res('./save/ax_cpt/LSTM_50_res', res) train_results_plots(dir='./save/ax_cpt', figname='LSTM_50', names=['LSTM_50'], numbers=[res])
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots env = gym.make('seq_prediction-v0', size=50, p=0.5) N_tr = 80000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, print_progress=True, seed=123) test(env, agent, N_tst, print_progress=True, seed=123) save_train_res( './agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred/MC_50_.5', res) train_results_plots( dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred', figname='MC_50_.5', names=['MC_50_.5'], numbers=[res])
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots import torch torch.manual_seed(123) env = gym.make('12_AX-v0', size=10, prob_target=0.5) N_tr = 20000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration test(env, agent, N_tst, seed=123) save_train_res('./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax/MC_10_0.5', res) train_results_plots(dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax/', figname='MC_10_0.5', names=['MC_10_0.5'], numbers=[res])
MAX_MEM_SIZE = 400 LR = 1e-3 EPSILON = 0.999 GAMMA = 0.9 PARAMS = { "lstm_hidden_size": 50, "n_lstm_layers": 2, "linear_hidden_size": 50, "n_linear_layers": 1 } env = BabiEnv(TASK_ID) env_test = BabiEnv(TASK_ID, mode='test') agent = Agent_DRQN(len(env.state_space), len(env.action_space), MAX_MEM_SIZE, LR, \ EPSILON, GAMMA, PARAMS) res_tr = train(env, agent, N_tr, seed=123, print_progress=False, render=False) res_te = test(env_test, agent, N_tst, seed=123, print_progress=False) save_train_res('./results/{0}_drqn_tr2'.format(TASK_ID), res_tr) save_train_res('./results/{0}_drqn_te2'.format(TASK_ID), res_te) te1, te2, te3 = load_train_res('./results/{0}_drqn_te2.npy'.format(TASK_ID)) res_tr = load_train_res('./results/{0}_drqn_tr2.npy'.format(TASK_ID)) train_results_plots(dir='./plots/', figname='{0}_tr'.format(TASK_ID), names=['DRQN_tr'], \ numbers=[res_tr]) print('Plots saved for task', TASK_ID)
env = gym.make('seq_prediction-v0', size=10, p=0.5) N_tr = 300 N_tst = 100 n_hidden = 10 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) res_10 = train(env, agent, N_tr, print_progress=True, seed=123) save_train_res('./save/seq_pred/LSTM_10_res', res_10) # agent.save(dir='./save/seq_pred', name='LSTM_10') # agent.load('./save/seq_pred/LSTM_10') # test(env, agent, N_tst, print_progress=True, seed=123) # a = load_train_res('./save/seq_pred/LSTM_10_res.npy') N_tr = 300 N_tst = 100 n_hidden = 30 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) res_30 = train(env, agent, N_tr, print_progress=True, seed=123) save_train_res('./save/seq_pred/LSTM_30_res', res_30) # agent.save(dir='./save/seq_pred', name='LSTM_30') # test(env, agent, N_tst, print_progress=True, seed=123) train_results_plots(dir='./save/seq_pred', figname='LSTM', names=['LSTM_10', 'LSTM_30'], numbers=[res_10, res_30])
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots import torch seed = 123 env = gym.make('Simple_Copy_Repeat-v0', n_char=5, size=10, repeat=3) N_tr = 10000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, seed=123, print_progress=True) test(env, agent, N_tst, seed=123, print_progress=True) save_train_res( './agents/cog_tasks_rl_agents/MonteCarlo/save/copy_repeat/MC_10_3', res) train_results_plots( dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/copy_repeat/', figname='MC_10_3', names=['MC_10_3'], numbers=[res])
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots import torch import numpy as np res = np.load('./save/simple_copy/MC_3.npy') train_results_plots(dir='./save/simple_copy', figname='MC_3', names=['MC_3'], numbers=[res])
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from common.utils import train, test, save_train_res, train_results_plots from MonteCarlo_model import Agent_MC import torch torch.manual_seed(123) env = gym.make('Simple_Copy-v0', n_char=5, size=50) seed = 123 N_tr = 10000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, custom_reward=lambda r: r * 10, seed=seed) test(env, agent, N_tst, seed=seed) save_train_res( './agents/cog_tasks_rl_agents/MonteCarlo/save/simple_copy/MC_50', res) train_results_plots( dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/simple_copy', figname='MC_50', names=['MC_50'], numbers=[res])
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots import torch torch.manual_seed(123) env = gym.make('Saccade-v0', go_reward=7) N_tr = 5000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration test(env, agent, N_tst, seed=123) save_train_res('./agents/cog_tasks_rl_agents/MonteCarlo/save/saccade/MC_7', res) train_results_plots(dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/saccade', figname='MC_7', names=['MC_7'], numbers=[res])
n_hidden = 10 n_layers = 1 lr = 0.01 lstm = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers) hierarchy_num = 3 learn_mode = 'SL' hyperparam = { 'alpha': np.ones(hierarchy_num) * 0.075, 'lambd': np.array([0.1, 0.5, 0.99]), 'beta': np.ones(hierarchy_num) * 15, 'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]), 'gamma': 15 } her = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam) res_lstm = train(env, lstm, N_tr, seed=123) res_her = train(env, her, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/LSTM', res_lstm) save_train_res('./save/HER', res_her) test(env, lstm, N_tst, seed=123) test(env, her, N_tst, seed=123) res_lstm = load_train_res('./save/LSTM.npy') res_her = load_train_res('./save/HER.npy') train_results_plots(dir='./save/', figname='test', names=['LSTM', 'HER'], \ numbers=[res_lstm, res_her])