Example #1
0
env_tr = gym.make('Simple_Copy_Repeat-v1', n_char=10, size=3, repeat=2)
env_val_full = gym.make('Simple_Copy_Repeat-v1', n_char=10, size=3, repeat=2)
env_val_full.setMode('full')
env_val_minor = gym.make('Simple_Copy_Repeat-v1', n_char=10, size=3, repeat=2)
env_val_minor.setMode('minor', n_exclude=1)

n_hidden = 50
n_layers = 1
lr = 0.001

# train on the full mode
print("-"*10 + "\nFULL MODE:")
env_tr.setMode('full')
torch.manual_seed(123)
agent = Agent_LSTM(env_tr.observation_space.n, env_tr.action_space.n, n_hidden, lr, n_layers)
res_full = train_and_val(agent, env_tr, env_val_full, env_val_minor, N_tr, N_val)

# train on the major mode with n_exclude=1 (9*9*9 = 729)
print("-"*10 + "\nMAJOR MODE (1):")
env_tr.setMode('major', n_exclude=1)
torch.manual_seed(123)
agent = Agent_LSTM(env_tr.observation_space.n, env_tr.action_space.n, n_hidden, lr, n_layers)
res_major_729 = train_and_val(agent, env_tr, env_val_full, env_val_minor, N_tr, N_val)

# train on the major mode with n_exclude=2 (8*8*8 = 512)
print("-"*10 + "\nMAJOR MODE (1):")
env_tr.setMode('major', n_exclude=2)
torch.manual_seed(123)
agent = Agent_LSTM(env_tr.observation_space.n, env_tr.action_space.n, n_hidden, lr, n_layers)
res_major_512 = train_and_val(agent, env_tr, env_val_full, env_val_minor, N_tr, N_val)
Example #2
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res, load_train_res, train_results_plots

env = gym.make('12AX_CPT-v0', size=200)

N_tr = 5000
N_tst = 500
n_hidden = 50
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

res = train(env, agent, N_tr, print_progress=True, seed=123)
save_train_res('./save/ax_cpt/LSTM_50_res', res)

train_results_plots(dir='./save/ax_cpt',
                    figname='LSTM_50',
                    names=['LSTM_50'],
                    numbers=[res])
from HER.HER_model import Agent_HER
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import torch
import numpy as np

torch.manual_seed(123)

env = gym.make('12_AX-v0', size=10, prob_target=0.5)

N_tr = 2000
N_tst = 1000

n_hidden = 10
n_layers = 1
lr = 0.01
lstm = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr,
                  n_layers)

hierarchy_num = 3
learn_mode = 'SL'
hyperparam = {
    'alpha': np.ones(hierarchy_num) * 0.075,
    'lambd': np.array([0.1, 0.5, 0.99]),
    'beta': np.ones(hierarchy_num) * 15,
    'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]),
    'gamma': 15
}
her = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num,
                learn_mode, hyperparam)

res_lstm = train(env, lstm, N_tr, seed=123)
res_her = train(env, her, N_tr, seed=123)