def test(environment, action_space, iteration): flag = False env = wrap_environment(environment, action_space, monitor=True, iteration=iteration) net = CNNDQN(env.observation_space.shape, env.action_space.n) net.load_state_dict( torch.load(join(PRETRAINED_MODELS, '%s.dat' % environment))) total_reward = 0.0 state = env.reset() while True: state_v = torch.tensor(np.array([state], copy=False)) q_vals = net(state_v).data.numpy()[0] action = np.argmax(q_vals) state, reward, done, info = env.step(action) total_reward += reward if info['flag_get']: print('WE GOT THE FLAG!!!!!!!') flag = True if done: print(total_reward) break env.close() return flag
def main(): args = parse_args() env = wrap_environment(args.environment) device = set_device(args.force_cpu) model, target_model = initialize_models(env, device) optimizer = Adam(model.parameters(), lr=args.learning_rate) replay_buffer = ReplayBuffer(args.buffer_capacity) train(env, model, target_model, optimizer, replay_buffer, args, device) env.close()
def main(): args = parse_args() env = wrap_environment(args.environment, monitor=True) device = set_device(args.force_cpu) model, target_model = initialize_models(env, device, args.checkpoint) done = False state = env.reset() episode_reward = 0.0 while not done: action = model.act(state, device) next_state, reward, done, _ = env.step(action) episode_reward += reward state = next_state print(f'Episode Reward: {round(episode_reward, 3)}') env.close()