#load agent.load("./save/example_dqn.h5") done = False batch_size = 32 for e in range(EPISODES): state = env.reset() state = np.reshape(state, [1, state_size]) # print(e) last_reward = 0 for time in range(1000): # delay.sleep(1/50) #render env.render() # action = agent.act(state) action = agent.act_2(state) commands = actions[action] # if env.key != '': # # print(env.key-297) # commands = actions[env.key-257] # else: # commands = actions[0] # print(commands) # print(state) next_state, reward, done, _ = env.step2(commands) # reward = reward if not done else -10 if (time == 0): last_reward = reward - 1