env_tr = gym.make('Simple_Copy_Repeat-v1', n_char=10, size=3, repeat=2) env_val_full = gym.make('Simple_Copy_Repeat-v1', n_char=10, size=3, repeat=2) env_val_full.setMode('full') env_val_minor = gym.make('Simple_Copy_Repeat-v1', n_char=10, size=3, repeat=2) env_val_minor.setMode('minor', n_exclude=1) n_hidden = 50 n_layers = 1 lr = 0.001 # train on the full mode print("-"*10 + "\nFULL MODE:") env_tr.setMode('full') torch.manual_seed(123) agent = Agent_LSTM(env_tr.observation_space.n, env_tr.action_space.n, n_hidden, lr, n_layers) res_full = train_and_val(agent, env_tr, env_val_full, env_val_minor, N_tr, N_val) # train on the major mode with n_exclude=1 (9*9*9 = 729) print("-"*10 + "\nMAJOR MODE (1):") env_tr.setMode('major', n_exclude=1) torch.manual_seed(123) agent = Agent_LSTM(env_tr.observation_space.n, env_tr.action_space.n, n_hidden, lr, n_layers) res_major_729 = train_and_val(agent, env_tr, env_val_full, env_val_minor, N_tr, N_val) # train on the major mode with n_exclude=2 (8*8*8 = 512) print("-"*10 + "\nMAJOR MODE (1):") env_tr.setMode('major', n_exclude=2) torch.manual_seed(123) agent = Agent_LSTM(env_tr.observation_space.n, env_tr.action_space.n, n_hidden, lr, n_layers) res_major_512 = train_and_val(agent, env_tr, env_val_full, env_val_minor, N_tr, N_val)
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res, load_train_res, train_results_plots env = gym.make('12AX_CPT-v0', size=200) N_tr = 5000 N_tst = 500 n_hidden = 50 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) res = train(env, agent, N_tr, print_progress=True, seed=123) save_train_res('./save/ax_cpt/LSTM_50_res', res) train_results_plots(dir='./save/ax_cpt', figname='LSTM_50', names=['LSTM_50'], numbers=[res])
from HER.HER_model import Agent_HER from common.utils import train, test, save_train_res, load_train_res, train_results_plots import torch import numpy as np torch.manual_seed(123) env = gym.make('12_AX-v0', size=10, prob_target=0.5) N_tr = 2000 N_tst = 1000 n_hidden = 10 n_layers = 1 lr = 0.01 lstm = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers) hierarchy_num = 3 learn_mode = 'SL' hyperparam = { 'alpha': np.ones(hierarchy_num) * 0.075, 'lambd': np.array([0.1, 0.5, 0.99]), 'beta': np.ones(hierarchy_num) * 15, 'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]), 'gamma': 15 } her = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam) res_lstm = train(env, lstm, N_tr, seed=123) res_her = train(env, her, N_tr, seed=123)