import torch from neural_net import NeuralNet import gym model = NeuralNet() model.load_state_dict(torch.load("savedmodel")) env = gym.make("LunarLander-v2") for episodes in range(10): observation = env.reset() action = env.action_space.sample() observation, reward, done, info = env.step(action) for t in range(1000): # render the environment env.render() data = observation.tolist() data.append(reward) action = model(torch.tensor(data).float()) action = (action == action.max()).nonzero().tolist()[0][0] observation, reward, done, info = env.step(int(action)) print(action, observation, reward, info) if done: print("Episode finished after {} timestep".format((t + 1))) break env.close()