def traffic():
    for i in range(100):
        observation = env.reset()
        t_reward = 0
        step = 0
        r1 = rnd
        r2 = rnd
        r1.seed(1)
        r2.seed(2)
        while True:
            step += 1
            # time.sleep(0.1)
            cars(r1, r2)
            env.render()
            action = RL.choose_action(observation)
            if int(observation[5]) < 6:
                # print("can not change")
                action = "n"
            # print(action)
            observation_, reward, done = env.switch_light(action)
            t_reward += reward
            RL.save_memory(observation, action, reward, observation_)
            if step > 500 and step % 5 == 0:
                RL.learn()

            observation = observation_
            if done:
                print(t_reward)
                break
def main():
    for i in range(1, MAX_EPISODES):
        print(i, "of episodes", end="\n")
        start_time = time.time()
        observation = env.reset()
        for j in range(MAX_STEP_EPISODES):
            env.render()
            action = RL.choose_action(observation)
            if j < 5:
                action = 0
            observation_, reward, done, info = env.step(action)
            RL.store_transition(observation, action, reward, False)

            if done:
                RL.store_transition(observation, action, 0.0, True)
                RL.learn()
                break
            observation = observation_

        end_time = time.time()
        plot_.plot_graph((end_time - start_time), i)
    env.close()
    RL.store_net()