Exemple #1
0
def fasttext_evaluation(model_testing_result: OutputDirectory(),
                        trained_model_dir: InputDirectory() = None,
                        test_data_dir: InputDirectory() = None):
    print('=====================================================')
    print(f'trained_model_dir: {Path(trained_model_dir).resolve()}')
    print(f'test_data_dir: {Path(test_data_dir).resolve()}')
    path_word_to_index = os.path.join(test_data_dir, 'word_to_index.json')
    word_to_index = get_vocab(path_word_to_index)
    path_label = os.path.join(test_data_dir, 'label.txt')
    map_id_label, map_label_id = get_id_label(path_label)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)
    path = os.path.join(trained_model_dir, 'shared_params.json')
    with open(path, 'r', encoding='utf-8') as f:
        shared_params = json.load(f)
    path = os.path.join(test_data_dir, 'data.txt')
    test_samples = load_dataset(file_path=path,
                                max_len=shared_params['max_len'],
                                ngram_size=shared_params['ngram_size'],
                                word_to_index=word_to_index,
                                map_label_id=map_label_id)
    test_iter = DataIter(samples=test_samples, shuffle=False, device=device)
    path = os.path.join(trained_model_dir, 'BestModel')
    model = torch.load(f=path, map_location=device)
    path = os.path.join(model_testing_result, 'result.json')
    acc_ = test(model, test_iter)
    with open(path, 'w', encoding='utf-8') as f:
        json.dump({"acc": acc_}, f)
    print('\n============================================')
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('Simple_Copy-v0', n_char=10, size=100)

N_tr = 2000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001  # learning rate
DECAY = 0.001
EPSILON = 0.2  # greedy policy
GAMMA = 0.9  # reward discount
TARGET_REPLACE_ITER = 200  # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY,
                  N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER,
                  BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/simple_copy/DQN_size_100', res)
test(env, agent, N_tst, seed=123)
Exemple #3
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots

env = gym.make('seq_prediction-v0', size=50, p=0.5)

N_tr = 80000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, print_progress=True, seed=123)
test(env, agent, N_tst, print_progress=True, seed=123)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred/MC_50_.5', res)
train_results_plots(
    dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred',
    figname='MC_50_.5',
    names=['MC_50_.5'],
    numbers=[res])
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test


env = gym.make('AX_12-v0', size=10, prob_target=0.5)

N_tr = 1000
N_tst = 100
n_hidden = 20
lr = 0.01

agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

train(env, agent, N_tr)
test(env, agent, N_tst)
Exemple #5
0
from stack_env import Environment
from dqn import DQNAgent, DoubleDQNAgent
from common.utils import mini_batch_train_frames, test

MAX_FRAMES = 1000000
BATCH_SIZE = 32
mode = 'test'
env = Environment()
agent = DoubleDQNAgent(env, use_conv=False)
if mode == 'train':
    episode_rewards = mini_batch_train_frames(env, agent, MAX_FRAMES,
                                              BATCH_SIZE)
elif mode == 'test':
    test(env, agent)
Exemple #6
0
MAX_MEM_SIZE = 400
LR = 1e-3
EPSILON = 0.999
GAMMA = 0.9
PARAMS = {
    "lstm_hidden_size": 50,
    "n_lstm_layers": 2,
    "linear_hidden_size": 50,
    "n_linear_layers": 1
}

env = BabiEnv(TASK_ID)
env_test = BabiEnv(TASK_ID, mode='test')

agent = Agent_DRQN(len(env.state_space), len(env.action_space), MAX_MEM_SIZE, LR, \
                    EPSILON, GAMMA, PARAMS)

res_tr = train(env, agent, N_tr, seed=123, print_progress=False, render=False)
res_te = test(env_test, agent, N_tst, seed=123, print_progress=False)

save_train_res('./results/{0}_drqn_tr2'.format(TASK_ID), res_tr)
save_train_res('./results/{0}_drqn_te2'.format(TASK_ID), res_te)

te1, te2, te3 = load_train_res('./results/{0}_drqn_te2.npy'.format(TASK_ID))
res_tr = load_train_res('./results/{0}_drqn_tr2.npy'.format(TASK_ID))

train_results_plots(dir='./plots/', figname='{0}_tr'.format(TASK_ID), names=['DRQN_tr'], \
                    numbers=[res_tr])

print('Plots saved for task', TASK_ID)
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from common.utils import train, test, save_train_res, train_results_plots
from MonteCarlo_model import Agent_MC
import torch

torch.manual_seed(123)
env = gym.make('12_AX_S-v0', size=10, prob_target=0.5)

seed = 123
N_tr = 50000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, custom_reward=lambda r: r * 10, seed=seed)
test(env, agent, N_tst, seed=seed)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax_s/MC_10_0.5', res)
train_results_plots(
    dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax_s/',
    figname='MC_10_0.5',
    names=['MC_10_0.5'],
    numbers=[res])
n_hidden = 10
n_layers = 1
lr = 0.01
lstm = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr,
                  n_layers)

hierarchy_num = 3
learn_mode = 'SL'
hyperparam = {
    'alpha': np.ones(hierarchy_num) * 0.075,
    'lambd': np.array([0.1, 0.5, 0.99]),
    'beta': np.ones(hierarchy_num) * 15,
    'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]),
    'gamma': 15
}
her = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num,
                learn_mode, hyperparam)

res_lstm = train(env, lstm, N_tr, seed=123)
res_her = train(env, her, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/LSTM', res_lstm)
save_train_res('./save/HER', res_her)

test(env, lstm, N_tst, seed=123)
test(env, her, N_tst, seed=123)

res_lstm = load_train_res('./save/LSTM.npy')
res_her = load_train_res('./save/HER.npy')
train_results_plots(dir='./save/', figname='test', names=['LSTM', 'HER'], \
                    numbers=[res_lstm, res_her])