예제 #1
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res, load_train_res, train_results_plots

env = gym.make('12AX_CPT-v0', size=200)

N_tr = 5000
N_tst = 500
n_hidden = 50
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

res = train(env, agent, N_tr, print_progress=True, seed=123)
save_train_res('./save/ax_cpt/LSTM_50_res', res)

train_results_plots(dir='./save/ax_cpt',
                    figname='LSTM_50',
                    names=['LSTM_50'],
                    numbers=[res])
예제 #2
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots

env = gym.make('seq_prediction-v0', size=50, p=0.5)

N_tr = 80000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, print_progress=True, seed=123)
test(env, agent, N_tst, print_progress=True, seed=123)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred/MC_50_.5', res)
train_results_plots(
    dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred',
    figname='MC_50_.5',
    names=['MC_50_.5'],
    numbers=[res])
예제 #3
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots
import torch

torch.manual_seed(123)

env = gym.make('12_AX-v0', size=10, prob_target=0.5)

N_tr = 20000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
test(env, agent, N_tst, seed=123)

save_train_res('./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax/MC_10_0.5',
               res)
train_results_plots(dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax/',
                    figname='MC_10_0.5',
                    names=['MC_10_0.5'],
                    numbers=[res])
예제 #4
0
MAX_MEM_SIZE = 400
LR = 1e-3
EPSILON = 0.999
GAMMA = 0.9
PARAMS = {
    "lstm_hidden_size": 50,
    "n_lstm_layers": 2,
    "linear_hidden_size": 50,
    "n_linear_layers": 1
}

env = BabiEnv(TASK_ID)
env_test = BabiEnv(TASK_ID, mode='test')

agent = Agent_DRQN(len(env.state_space), len(env.action_space), MAX_MEM_SIZE, LR, \
                    EPSILON, GAMMA, PARAMS)

res_tr = train(env, agent, N_tr, seed=123, print_progress=False, render=False)
res_te = test(env_test, agent, N_tst, seed=123, print_progress=False)

save_train_res('./results/{0}_drqn_tr2'.format(TASK_ID), res_tr)
save_train_res('./results/{0}_drqn_te2'.format(TASK_ID), res_te)

te1, te2, te3 = load_train_res('./results/{0}_drqn_te2.npy'.format(TASK_ID))
res_tr = load_train_res('./results/{0}_drqn_tr2.npy'.format(TASK_ID))

train_results_plots(dir='./plots/', figname='{0}_tr'.format(TASK_ID), names=['DRQN_tr'], \
                    numbers=[res_tr])

print('Plots saved for task', TASK_ID)
env = gym.make('seq_prediction-v0', size=10, p=0.5)

N_tr = 300
N_tst = 100
n_hidden = 10
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

res_10 = train(env, agent, N_tr, print_progress=True, seed=123)
save_train_res('./save/seq_pred/LSTM_10_res', res_10)
# agent.save(dir='./save/seq_pred', name='LSTM_10')
# agent.load('./save/seq_pred/LSTM_10')
# test(env, agent, N_tst, print_progress=True, seed=123)
# a = load_train_res('./save/seq_pred/LSTM_10_res.npy')

N_tr = 300
N_tst = 100
n_hidden = 30
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

res_30 = train(env, agent, N_tr, print_progress=True, seed=123)
save_train_res('./save/seq_pred/LSTM_30_res', res_30)
# agent.save(dir='./save/seq_pred', name='LSTM_30')
# test(env, agent, N_tst, print_progress=True, seed=123)

train_results_plots(dir='./save/seq_pred',
                    figname='LSTM',
                    names=['LSTM_10', 'LSTM_30'],
                    numbers=[res_10, res_30])
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots
import torch

seed = 123

env = gym.make('Simple_Copy_Repeat-v0', n_char=5, size=10, repeat=3)

N_tr = 10000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)
res = train(env, agent, N_tr, seed=123, print_progress=True)
test(env, agent, N_tst, seed=123, print_progress=True)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/copy_repeat/MC_10_3', res)
train_results_plots(
    dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/copy_repeat/',
    figname='MC_10_3',
    names=['MC_10_3'],
    numbers=[res])
예제 #7
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots
import torch
import numpy as np

res = np.load('./save/simple_copy/MC_3.npy')

train_results_plots(dir='./save/simple_copy',
                    figname='MC_3',
                    names=['MC_3'],
                    numbers=[res])
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from common.utils import train, test, save_train_res, train_results_plots
from MonteCarlo_model import Agent_MC
import torch

torch.manual_seed(123)
env = gym.make('Simple_Copy-v0', n_char=5, size=50)

seed = 123
N_tr = 10000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, custom_reward=lambda r: r * 10, seed=seed)
test(env, agent, N_tst, seed=seed)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/simple_copy/MC_50', res)
train_results_plots(
    dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/simple_copy',
    figname='MC_50',
    names=['MC_50'],
    numbers=[res])
예제 #9
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots
import torch

torch.manual_seed(123)

env = gym.make('Saccade-v0', go_reward=7)

N_tr = 5000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
test(env, agent, N_tst, seed=123)

save_train_res('./agents/cog_tasks_rl_agents/MonteCarlo/save/saccade/MC_7',
               res)
train_results_plots(dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/saccade',
                    figname='MC_7',
                    names=['MC_7'],
                    numbers=[res])
예제 #10
0
n_hidden = 10
n_layers = 1
lr = 0.01
lstm = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr,
                  n_layers)

hierarchy_num = 3
learn_mode = 'SL'
hyperparam = {
    'alpha': np.ones(hierarchy_num) * 0.075,
    'lambd': np.array([0.1, 0.5, 0.99]),
    'beta': np.ones(hierarchy_num) * 15,
    'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]),
    'gamma': 15
}
her = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num,
                learn_mode, hyperparam)

res_lstm = train(env, lstm, N_tr, seed=123)
res_her = train(env, her, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/LSTM', res_lstm)
save_train_res('./save/HER', res_her)

test(env, lstm, N_tst, seed=123)
test(env, her, N_tst, seed=123)

res_lstm = load_train_res('./save/LSTM.npy')
res_her = load_train_res('./save/HER.npy')
train_results_plots(dir='./save/', figname='test', names=['LSTM', 'HER'], \
                    numbers=[res_lstm, res_her])