def test(environment, action_space, iteration):
    flag = False
    env = wrap_environment(environment,
                           action_space,
                           monitor=True,
                           iteration=iteration)
    net = CNNDQN(env.observation_space.shape, env.action_space.n)
    net.load_state_dict(
        torch.load(join(PRETRAINED_MODELS, '%s.dat' % environment)))

    total_reward = 0.0
    state = env.reset()
    while True:
        state_v = torch.tensor(np.array([state], copy=False))
        q_vals = net(state_v).data.numpy()[0]
        action = np.argmax(q_vals)
        state, reward, done, info = env.step(action)
        total_reward += reward
        if info['flag_get']:
            print('WE GOT THE FLAG!!!!!!!')
            flag = True
        if done:
            print(total_reward)
            break

    env.close()
    return flag
def main():
    args = parse_args()
    env = wrap_environment(args.environment)
    device = set_device(args.force_cpu)
    model, target_model = initialize_models(env, device)
    optimizer = Adam(model.parameters(), lr=args.learning_rate)
    replay_buffer = ReplayBuffer(args.buffer_capacity)
    train(env, model, target_model, optimizer, replay_buffer, args, device)
    env.close()
Exemple #3
0
def main():
    args = parse_args()
    env = wrap_environment(args.environment, monitor=True)
    device = set_device(args.force_cpu)
    model, target_model = initialize_models(env, device, args.checkpoint)

    done = False
    state = env.reset()
    episode_reward = 0.0

    while not done:
        action = model.act(state, device)
        next_state, reward, done, _ = env.step(action)
        episode_reward += reward
        state = next_state

    print(f'Episode Reward: {round(episode_reward, 3)}')
    env.close()