torch.manual_seed(seed) if torch.cuda.is_available(): device = torch.device('cuda:0') else: device = torch.device('cpu') #device = torch.device('cpu') print(device) env = make_env(seed) state_shape = env.observation_space.shape n_actions = env.action_space.n state = env.reset() agent = DQNAgent(state_shape, n_actions, epsilon=0.9).to(device) #agent.load_state_dict(torch.load('dqn.weights')) target_network = DQNAgent(state_shape, n_actions).to(device) target_network.load_state_dict(agent.state_dict()) opt = torch.optim.Adam(agent.parameters(), lr=1e-4) exp_replay = ReplayBuffer(buffer_size) print('test_buffer') for i in range(100): play_and_record(state, agent, env, exp_replay, n_steps=10**2) if len(exp_replay) == buffer_size: break print(len(exp_replay)) state = env.reset() for step in trange(step, total_steps + 1): agent.epsilon = linear_decay(init_epsilon, final_epsilon, step, decay_steps)