Esempio n. 1
0
def run(args):
    if args.train_dqn:
        env_name = args.env_name or 'BreakoutNoFrameskip-v4'
        env = Environment(env_name, args, atari_wrapper=True)
        from agent_dqn import Agent_DQN
        agent = Agent_DQN(env, args)
        agent.train()

    if args.test_dqn:
        env = Environment('BreakoutNoFrameskip-v4', args, atari_wrapper=True, test=True)
        from agent_dqn import Agent_DQN
        agent = Agent_DQN(env, args)
        test(agent, env, total_episodes=100)
Esempio n. 2
0
def run(args):
    if args.train_dqn:
        env_name = args.env_name or 'SEVN-Test-AllObs-Shaped-v1'
        env = gym.make(env_name)
        from agent_dqn import Agent_DQN
        agent = Agent_DQN(env, args)
        print("Device: ", device)
        print("n_heads: ", args.n_heads)
        agent.train()

    if args.test_dqn:
        current_time = '2020-12-22-00:03:03'
        env_name = args.env_name or 'SEVN-Test-AllObs-Shaped-v1'
        env = gym.make(env_name)
        from agent_dqn import Agent_DQN
        agent = Agent_DQN(env, args)
        test(agent, env, 10, current_time)
Esempio n. 3
0
def run(args):
    if args.train_dqn:
        env_name = args.env_name or 'BreakoutNoFrameskip-v4'
        env = Environment(env_name, args, atari_wrapper=True)
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        from agent_dqn import Agent_DQN
        agent = Agent_DQN(env, args)
        agent.train()

    if args.test_dqn:
        env = Environment('BreakoutNoFrameskip-v4',
                          args,
                          atari_wrapper=True,
                          test=True)
        from agent_dqn import Agent_DQN
        agent = Agent_DQN(env, args)
        test(agent, env, total_episodes=100)
Esempio n. 4
0
    args = parser.parse_args()
    return args


args = parse()
env = Environment('BreakoutNoFrameskip-v4', "", atari_wrapper=True, test=False)
n = env.action_space
state = env.reset()
device = torch.device("cpu")
input = torch.tensor(state, device=device)
agent = Agent_DQN(env, args)
dqn = DQN()
torch.save(dqn.state_dict(), "checkpoint.pth")
state_dict = torch.load("checkpoint.pth")

agent.train()
# Experience = namedtuple(
#             'Experience',
#             ('state','action','next_state','reward')
#         )
# e = Experience(state,action,next_state,reward)
# print(e)
# agent.train()

# print("shape:",input.shape)
#
# input = input.unsqueeze(0).to(device)
# input = input.permute(0, 3, 1, 2)
# input = input.to(device=device, dtype=torch.float)
#
# print("shape:",input.shape)