def traffic(): for i in range(100): observation = env.reset() t_reward = 0 step = 0 r1 = rnd r2 = rnd r1.seed(1) r2.seed(2) while True: step += 1 # time.sleep(0.1) cars(r1, r2) env.render() action = RL.choose_action(observation) if int(observation[5]) < 6: # print("can not change") action = "n" # print(action) observation_, reward, done = env.switch_light(action) t_reward += reward RL.save_memory(observation, action, reward, observation_) if step > 500 and step % 5 == 0: RL.learn() observation = observation_ if done: print(t_reward) break
def main(): for i in range(1, MAX_EPISODES): print(i, "of episodes", end="\n") start_time = time.time() observation = env.reset() for j in range(MAX_STEP_EPISODES): env.render() action = RL.choose_action(observation) if j < 5: action = 0 observation_, reward, done, info = env.step(action) RL.store_transition(observation, action, reward, False) if done: RL.store_transition(observation, action, 0.0, True) RL.learn() break observation = observation_ end_time = time.time() plot_.plot_graph((end_time - start_time), i) env.close() RL.store_net()