replace_target_iter=100, memory_size=2000,
                  e_greedy_increment=0.001,)

total_steps = 0

ep_rhistory = []


for i_episode in range(500):

    observation = env.reset()
    ep_r = 0
    while True:
        # env.render()

        action = RL.choose_action(observation.flatten())

        observation_, reward, done, info = env.step(action)

        # the smaller theta and closer to center the better
        # print(observation_)
        # x, x_dot, theta, theta_dot = observation_
        # r1 = (env.x_threshold - abs(x))/env.x_threshold - 0.8
        # r2 = (env.theta_threshold_radians - abs(theta))/env.theta_threshold_radians - 0.5
        # reward = r1 + r2

        RL.store_transition(observation.flatten(), action, reward, observation_.flatten())

        ep_r += reward
        if total_steps > 1000:
            RL.learn()
    #Initializing
    cross = crossroads_map(x, y)
    visual = Visual()
    obs = []
    for xx in x:
        for yy in y:
            lab = str(xx) + str(yy)
            obs = np.concatenate(
                (obs, cross[lab].car_nums, cross[lab].light_state), axis=None)

    #Training steps
    for steps in range(200000):

        visual.visual_before(cross, x, y, times, b, bias, bias_t)

        action = RL.choose_action(obs)
        action_set = [[0 for i in range(grid_y + 1)]
                      for j in range(grid_x + 1)]
        peri_cars = [[([0] * 4) for i in range(grid_y + 1)]
                     for j in range(grid_x + 1)]
        in_cars = [[([0] * 4) for i in range(grid_y + 1)]
                   for j in range(grid_x + 1)]

        #light state changes, cars numbers change, interactions between crossroads and peripherals
        for xx in x:
            for yy in y:
                lab = str(xx) + str(yy)
                #10->binary coding for action(1 value), like if action=128, 9 bits binary coding
                #of it is 010000000, indicating a 3*3 grid with each crossroad having action of
                #'0''1''0''0''0''0''0''0''0'(storing in action set), each action is either '0' or '1',
                #for 'change state' or 'keep on'. The binary number is set to be (grid_x*grid_y) bits,