Example #1
0
def navigation_main():
    env = UnityEnvironment(file_name="Navigation/Banana.app")
    seed = 777
    np.random.seed(seed)
    seed_torch(seed)

    num_episode = 2000
    memory_size = 10000
    batch_size = 64
    target_update = 4
    epsilon_decay = 0.9

    agent = DQNAgent(env, memory_size, batch_size, target_update, epsilon_decay)
    agent.train(num_episode)

    agent.test()
Example #2
0
import argparse
import copy
import microgridRLsimulator

import params.params as params
from agents.random_agent import RandomAgent
from agents.agent_ppo import PPOAgent
from agents.dqn_agent import DQNAgent

parser = argparse.ArgumentParser()
parser.add_argument('--env',
                    '-e',
                    type=str,
                    default='microgrid',
                    choices=['microgrid', 'maze-dense', 'maze-sparse'])

args = parser.parse_args()

params = copy.deepcopy(params.params)
params['env']['env'] = args.env

params['env']['case'] = 'elespino_discrete'
agent = DQNAgent(params)

agent.train()
# agent.test()
agent.store_results(render_tr_te=2)
print("End of agent's life")
Example #3
0
import gym
from agents.dqn_agent import experienceReplayBuffer_DQN, DQNAgent, QNetwork_DQN
import torch
from agents import evaluate
from copy import deepcopy



if __name__ == "__main__":
    n_iter = 100000
    env = gym.make('gym_pvz:pvz-env-v2')
    nn_name = input("Save name: ")
    buffer = experienceReplayBuffer_DQN(memory_size=100000, burn_in=10000)
    net = QNetwork_DQN(env, device='cpu', use_zombienet=False, use_gridnet=False)
    # old_agent = torch.load("agents/benchmark/dfq5_znet_epslinear")
    # net.zombienet.load_state_dict(old_agent.zombienet.state_dict())
    # for p in net.zombienet.parameters():
    #     p.requires_grad = False
    # net.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),
    #                                       lr=net.learning_rate)
    agent = DQNAgent(env, net, buffer, n_iter=n_iter, batch_size=200)
    agent.train(max_episodes=n_iter, evaluate_frequency=5000, evaluate_n_iter=1000)
    torch.save(agent.network, nn_name)
    agent._save_training_data(nn_name)