from DQN.DQN_model import Agent_DQN from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('Simple_Copy-v0', n_char=10, size=100) N_tr = 2000 N_tst = 1000 BATCH_SIZE = 32 LR = 0.001 # learning rate DECAY = 0.001 EPSILON = 0.2 # greedy policy GAMMA = 0.9 # reward discount TARGET_REPLACE_ITER = 200 # target update frequency MEMORY_CAPACITY = 5000 N_ACTIONS = env.action_space.n N_STATES = 1 S_FOR_DONE = 0.0 agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/simple_copy/DQN_size_100', res) test(env, agent, N_tst, seed=123)
def main(): logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s %(filename)s] %(message)s') json_file = open('params.json') json_str = json_file.read() config = json.loads(json_str) if len(sys.argv) == 2: config['policy_dir'] = sys.argv[1] # loading the dataset train_loader, valid_loader = __load_data(config) # Creating the model model = NN(config['model']['architecture'], is_maskable = True) model = model.to(config['device']) # Getting the criterion, optimizer criterion = nn.CrossEntropyLoss() optimizer = get_optimizer(config, model) ################### # Train and Prune # ################### done = False epoch = 0 while not done: logging.info('') logging.info('Global step {}'.format(epoch)) logging.info('') train(model, train_loader, valid_loader, criterion, optimizer, config['train']['epochs'], config['train']['print_every'], config['device']) if epoch % config['prune']['each'] == 0 and epoch != 0: logging.info('Pruning the model') model = __prune(model, config) if config['prune']['reward_weights'] == True: logging.info('Rewarding the weights') model.reward() done = __verify_stop(model, config) epoch += 1 ############## # Last Train # ############## train(model, train_loader, valid_loader, criterion, optimizer, 5, config['train']['print_every'], config['device']) checkpoint = config['policy_dir'] + '/Train-{}-epochs__Prune-each-{}__Prune-rate-{}.pt'.format( config['train']['epochs'], config['prune']['each'], config['prune']['rate'] ) torch.save(model.state_dict(), checkpoint) logging.info("Model checkpoint saved to %s" % checkpoint) ###################### # Validate the model # ###################### test_loss = 0.0 classes = range(0, 10) class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) model.eval() # prep model for evaluation for data, target in valid_loader: data, target = data.to(config['device']), target.to(config['device']) # forward pass: compute predicted outputs by passing inputs to the model output = model(data) # calculate the loss loss = criterion(output, target) # update test loss test_loss += loss.item()*data.size(0) # convert output probabilities to predicted class _, pred = torch.max(output, 1) # compare predictions to true label correct = np.squeeze(pred.eq(target.data.view_as(pred))) # calculate test accuracy for each object class for i in range(len(target)): label = target.data[i] class_correct[label] += correct[i].item() class_total[label] += 1 # calculate and print avg test loss test_loss = test_loss/len(valid_loader.sampler) logging.info('Valid Loss: {:.6f}\n'.format(test_loss)) results = [] for i in range(10): if class_total[i] > 0: logging.info('Test Accuracy of %5s: %2d%% (%2d/%2d)' % ( str(i), 100 * class_correct[i] / class_total[i], np.sum(class_correct[i]), np.sum(class_total[i]))) results.append([str(i), 100 * class_correct[i] / class_total[i], np.sum(class_correct[i]), np.sum(class_total[i])]) else: logging.info('Test Accuracy of %5s: N/A (no training examples)' % (classes[i])) logging.info('Test Accuracy (Overall): %2d%% (%2d/%2d)' % ( 100. * np.sum(class_correct) / np.sum(class_total), np.sum(class_correct), np.sum(class_total))) results = pd.DataFrame(results) results.to_csv( config['policy_dir'] + '/Accuracy__Lottery-train-{}-epochs__Prune-each-{}__Prune-rate-{}.tsv'.format( config['train']['epochs'], config['prune']['each'], config['prune']['rate'] ), index = False, header = ['Class', 'Accuracy', 'Right_Instances', 'Total_Instances'], sep = '\t' )
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res, load_train_res, train_results_plots env = gym.make('12AX_CPT-v0', size=200) N_tr = 5000 N_tst = 500 n_hidden = 50 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) res = train(env, agent, N_tr, print_progress=True, seed=123) save_train_res('./save/ax_cpt/LSTM_50_res', res) train_results_plots(dir='./save/ax_cpt', figname='LSTM_50', names=['LSTM_50'], numbers=[res])
def main(): logging.basicConfig(level=logging.DEBUG, format="[%(asctime)s %(filename)s] %(message)s") json_file = open('params.json') json_str = json_file.read() config = json.loads(json_str) args = __create_args() config = __adjust_config(args, config) # loading the dataset train_loader, valid_loader = __load_data(config) # Creating the model model = NN(config['model']['architecture'], is_maskable=True) model = model.to(config['device']) initial_mask = model.masks # Getting the criterion, optimizer criterion = nn.CrossEntropyLoss() optimizer = get_optimizer(config, model) ######################### # Agent and Environment # ######################### ACTIONS = create_environment(model.masks, config['environment_protocol']) random.seed(42) shuffle(ACTIONS) N_STATES = len(ACTIONS) N_EPISODES = config['mdp']['N_EPISODES'] MAX_EPISODE_PER_STEPS = config['mdp']['MAX_STEPS_PER_EPISODES'] MIN_ALPHA = config['mdp']['MIN_ALPHA'] GAMMA = config['mdp']['GAMMA'] alphas = np.linspace(1.0, MIN_ALPHA, N_EPISODES) q_table = dict() start_state = State(model.masks, ACTIONS) ########################## # Create sub_working_dir # ########################## sub_working_dir = '{}/results/{}/{}/{}/{}'.format( config['working_dir'], config['model']['name'], '_' + config['mdp']['Q_COMPUTATION'], '{}_{}_{}/{}_{}'.format(time.strftime("%d", time.localtime()), time.strftime("%m", time.localtime()), time.strftime("%Y", time.localtime()), time.strftime("%H", time.localtime()), time.strftime("%M", time.localtime())), 'ALPHA_SEARCH__MIN_ALPHA-{}__GAMMA-{}__PRUNE_TYPE-{}__PRUNE_PERCENT-{}__EPSILON-{}__REWARD_TYPE-{}' .format(MIN_ALPHA, GAMMA if config['mdp']['Q_COMPUTATION'] != 'QL_M' else 'None', config['environment_protocol'], config['agent']['prune_percentage'], config['agent']['epsilon'], config['agent']['reward_type'])) if not os.path.exists(sub_working_dir): os.makedirs(sub_working_dir) config["sub_working_dir"] = sub_working_dir logging.info("sub working dir: %s" % sub_working_dir) ############### # Begin Train # ############### train(model, train_loader, valid_loader, criterion, optimizer, config['train']['epochs'], config['train']['print_every'], config['device']) loss, accuracy = validation(model, valid_loader, criterion) logging.info( 'Validation Loss performed: {}\tValidation Accuracy performed: {}'. format(loss, accuracy)) if config['agent']['reward_type'] == 'ACCURACY': start_state.last_reward = -(1. - accuracy) elif config['agent']['reward_type'] == 'LOSS': start_state.last_reward = -loss elif config['agent']['reward_type'] == 'ACC_COMPRESSION': start_state.last_reward = -(1. - accuracy) elif config['agent']['reward_type'] == 'MY_RCRA': start_state.last_reward = -(1. - accuracy) ######### # Prune # ######### for e in range(N_EPISODES): state = deepcopy(start_state) total_reward = .0 ALPHA = alphas[e] agent = Agent(config, ACTIONS, model, valid_loader, criterion) for i in range(MAX_EPISODE_PER_STEPS): action = agent.choose_action(q_table, state) next_state, reward, done = agent.act(state, action) total_reward += reward if config['mdp']['Q_COMPUTATION'] == 'QL_M': # Q-Learning from Ghallab, Nau and Traverso q_value(q_table, state)[action] = q_value(q_table, state, action) + \ ALPHA * (reward + np.max(q_value(q_table, next_state)) - q_value(q_table, state, action)) elif config['mdp']['Q_COMPUTATION'] == 'QL_WIKI': # Q-Learning from from Wikipedia q_value(q_table, state)[action] = (1. - ALPHA) * q_value(q_table, state, action) + \ ALPHA * (reward + GAMMA * np.max(q_value(q_table, next_state))) del state state = next_state if done: break logging.info("Episode {}: reward type {}: total reward -> {}".format( e + 1, config['agent']['reward_type'], total_reward)) ##################### # Save the solution # ##################### q_table_saver(q_table, config['sub_working_dir'], '/q_table.tsv') agent = Agent(config, ACTIONS, model, valid_loader, criterion) my_state = start_state result = [] done = False while not done: sa = q_value(q_table, my_state) my_action = np.argmax(sa) action = my_state.environment[my_action] my_state, reward, done = agent.act(my_state, my_action) result.append([action, reward]) final = pd.DataFrame(result, columns=['Action', 'Reward']) final.to_csv(config['sub_working_dir'] + '/actions_to_prune.tsv', sep='\t', index=False)
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test env = gym.make('AX_12-v0', size=10, prob_target=0.5) N_tr = 1000 N_tst = 100 n_hidden = 20 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) train(env, agent, N_tr) test(env, agent, N_tst)
MAX_MEM_SIZE = 400 LR = 1e-3 EPSILON = 0.999 GAMMA = 0.9 PARAMS = { "lstm_hidden_size": 50, "n_lstm_layers": 2, "linear_hidden_size": 50, "n_linear_layers": 1 } env = BabiEnv(TASK_ID) env_test = BabiEnv(TASK_ID, mode='test') agent = Agent_DRQN(len(env.state_space), len(env.action_space), MAX_MEM_SIZE, LR, \ EPSILON, GAMMA, PARAMS) res_tr = train(env, agent, N_tr, seed=123, print_progress=False, render=False) res_te = test(env_test, agent, N_tst, seed=123, print_progress=False) save_train_res('./results/{0}_drqn_tr2'.format(TASK_ID), res_tr) save_train_res('./results/{0}_drqn_te2'.format(TASK_ID), res_te) te1, te2, te3 = load_train_res('./results/{0}_drqn_te2.npy'.format(TASK_ID)) res_tr = load_train_res('./results/{0}_drqn_tr2.npy'.format(TASK_ID)) train_results_plots(dir='./plots/', figname='{0}_tr'.format(TASK_ID), names=['DRQN_tr'], \ numbers=[res_tr]) print('Plots saved for task', TASK_ID)
def main(): logging.basicConfig(level=logging.DEBUG, format="[%(asctime)s %(filename)s] %(message)s") json_file = open('params.json') json_str = json_file.read() config = json.loads(json_str) # Create sub_working_dir sub_working_dir = '{}/{}/try{}/{}'.format( config['working_dir'], config['model']['name'], config['try'], # time.strftime("%Y%m%d%H%M%S", time.localtime()) '{}_{}/{}/{}_{}'.format( time.strftime("%Y", time.localtime()), time.strftime("%m", time.localtime()), time.strftime("%d", time.localtime()), time.strftime("%H", time.localtime()), time.strftime("%S", time.localtime()) ) ) if not os.path.exists(sub_working_dir): os.makedirs(sub_working_dir) config["sub_working_dir"] = sub_working_dir logging.info("sub working dir: %s" % sub_working_dir) # Creat tf_summary writer config["tensorboard_writer"] = SummaryWriter(sub_working_dir) logging.info("Please using 'python -m tensorboard.main --logdir={}'".format(sub_working_dir)) # loading the dataset train_loader, valid_loader, test_loader = __load_data(config) # Creating the model model = NN(config['model']['architecture'], is_maskable = True) model = model.to(config['device']) # Getting the criterion, optimizer criterion = nn.CrossEntropyLoss() optimizer = get_optimizer(config, model) ############### # Begin Train # ############### train(model, train_loader, valid_loader, criterion, optimizer, config['train']['epochs'], config['train']['print_every'], config['device']) ############## # Begin Test # ############## # initialize lists to monitor test loss and accuracy test_loss = 0.0 class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) model.eval() # prep model for evaluation for data, target in test_loader: data, target = data.to(config['device']), target.to(config['device']) # forward pass: compute predicted outputs by passing inputs to the model output = model(data) # calculate the loss loss = criterion(output, target) # update test loss test_loss += loss.item()*data.size(0) # convert output probabilities to predicted class _, pred = torch.max(output, 1) # compare predictions to true label correct = np.squeeze(pred.eq(target.data.view_as(pred))) # calculate test accuracy for each object class for i in range(len(target)): label = target.data[i] class_correct[label] += correct[i].item() class_total[label] += 1 # calculate and logging.info avg test loss test_loss = test_loss/len(test_loader.sampler) logging.info('Test Loss: {:.6f}\n'.format(test_loss)) for i in range(10): if class_total[i] > 0: logging.info('Test Accuracy of %5s: %2d%% (%2d/%2d)' % ( str(i), 100 * class_correct[i] / class_total[i], np.sum(class_correct[i]), np.sum(class_total[i]))) else: logging.info('Test Accuracy of %5s: N/A (no training examples)' % (classes[i])) logging.info('Test Accuracy (Overall): %2d%% (%2d/%2d)' % ( 100. * np.sum(class_correct) / np.sum(class_total), np.sum(class_correct), np.sum(class_total)))
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from common.utils import train, test, save_train_res, train_results_plots from MonteCarlo_model import Agent_MC import torch torch.manual_seed(123) env = gym.make('12_AX_S-v0', size=10, prob_target=0.5) seed = 123 N_tr = 50000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, custom_reward=lambda r: r * 10, seed=seed) test(env, agent, N_tst, seed=seed) save_train_res( './agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax_s/MC_10_0.5', res) train_results_plots( dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax_s/', figname='MC_10_0.5', names=['MC_10_0.5'], numbers=[res])
def fasttext_train(trained_model_dir: OutputDirectory(type='ModelDirectory'), training_data_dir: InputDirectory() = None, validation_data_dir: InputDirectory() = None, epochs=1, batch_size=64, max_len=32, embed_dim=300, hidden_size=256, ngram_size=200000, dropout=0.5, learning_rate=0.001): print('============================================') print('training_data_dir:', training_data_dir) print('validation_data_dir:', validation_data_dir) path_word_to_index = os.path.join(training_data_dir, 'word_to_index.json') word_to_index = get_vocab(path_word_to_index) path_label = os.path.join(training_data_dir, 'label.txt') map_id_label, map_label_id = get_id_label(path_label) class_num = len(map_id_label) vocab_size = len(word_to_index) stop_patience = 5 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('device:', device) # load training dataset path = os.path.join(training_data_dir, 'data.txt') train_samples = load_dataset(file_path=path, word_to_index=word_to_index, map_label_id=map_label_id, max_len=max_len, ngram_size=ngram_size) train_iter = DataIter(samples=train_samples, batch_size=batch_size, shuffle=True, device=device) # load validation dataset path = os.path.join(validation_data_dir, 'data.txt') dev_samples = load_dataset(file_path=path, word_to_index=word_to_index, map_label_id=map_label_id, max_len=max_len, ngram_size=ngram_size) dev_iter = DataIter(samples=dev_samples, batch_size=batch_size, shuffle=True, device=device) model = FastText(vocab_size=vocab_size, class_num=class_num, dropout=dropout, embed_dim=embed_dim, hidden_size=hidden_size, ngram_size=ngram_size) # watch parameters print(model.parameters) # copy word_to_index.json and label.txt for later scoring. shutil.copy(src=path_word_to_index, dst=trained_model_dir) shutil.copy(src=path_label, dst=trained_model_dir) # shared parameters for loading dataset shared_params = {'max_len': max_len, 'ngram_size': ngram_size} path = os.path.join(trained_model_dir, 'shared_params.json') with open(path, 'w', encoding='utf-8') as f: json.dump(shared_params, f) start = time.time() train(model, trained_model_dir, train_iter=train_iter, dev_iter=dev_iter, epochs=epochs, learning_rate=learning_rate, stop_patience=stop_patience, device=device) end = time.time() print('\nduration of training process: %.2f sec' % (end - start)) print('============================================')
n_hidden = 10 n_layers = 1 lr = 0.01 lstm = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers) hierarchy_num = 3 learn_mode = 'SL' hyperparam = { 'alpha': np.ones(hierarchy_num) * 0.075, 'lambd': np.array([0.1, 0.5, 0.99]), 'beta': np.ones(hierarchy_num) * 15, 'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]), 'gamma': 15 } her = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam) res_lstm = train(env, lstm, N_tr, seed=123) res_her = train(env, her, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/LSTM', res_lstm) save_train_res('./save/HER', res_her) test(env, lstm, N_tst, seed=123) test(env, her, N_tst, seed=123) res_lstm = load_train_res('./save/LSTM.npy') res_her = load_train_res('./save/HER.npy') train_results_plots(dir='./save/', figname='test', names=['LSTM', 'HER'], \ numbers=[res_lstm, res_her])