if xx + 1 <= grid_x and in_cars[xx + 1][yy][3] > 0:
                        cross_ = cross[lab]
                        cross_.car_nums[3] += in_cars[xx + 1][yy][3]
                        cross[lab] = cross_
        #in the same diagram as the above 'visual_before', showing the incoming cars
        visual.visual_peri(peri_cars, x, y, times, b, bias, bias_, bias_t,
                           grid_x, grid_y)

        reward = 0
        for xx in x:
            for yy in y:
                lab = str(xx) + str(yy)
                for i in range(4):
                    reward = reward - cross[lab].car_nums[i]**2
        #show the result of state tranformation in another diagram 'visual_after', i.e. the result of 'visual_before'
        visual.visual_after(cross, x, y, times, b, bias, bias_t)
        time.sleep(10)

        obs_ = []
        for xx in x:
            for yy in y:
                lab = str(xx) + str(yy)
                obs_ = np.concatenate(
                    (obs_, cross[lab].car_nums, cross[lab].light_state),
                    axis=None)

        RL.store_transition(obs, action, reward, obs_)

        if steps > 200:
            RL.learn()
        if steps % 50 == 0:
コード例 #2
0
        peri_cars1, in_cars1 = cross1.state_change(action1)

        peri_cars2, in_cars2 = cross2.state_change(action2)
        print(peri_cars1, in_cars1, peri_cars2, in_cars2)
        
        visual.visual_peri(peri_cars1,peri_cars2)

        reward=0
        for i in range (4):
            if cross2.q_states[i]==1:
                cross2.car_nums[i]+=in_cars1
            if cross1.q_states[i]==1:
                cross1.car_nums[i]+=in_cars2
            reward = reward - cross1.car_nums[i]**2 - cross2.car_nums[i]**2
        
        visual.visual_after(cross1, cross2)

        obs_=np.concatenate((cross1.car_nums, cross1.light_state, cross2.car_nums, cross2.light_state),axis=None)
        RL.store_transition(obs,action,reward,obs_)
        if steps>200:
            RL.learn()
        if steps%50==0:
            print(steps,reward)
            
        reward_set.append(reward)
        step_set.append(steps)
        #plt.scatter(steps, reward)
        obs=obs_
        
    # window.mainloop()
    plt.plot(step_set,reward_set)