Example #1
0
sess.run(init)
tf.global_variables_initializer().run()

if load_model == True:
    print('Loading Model...')
    ckpt = tf.train.get_checkpoint_state(path)
    saver.restore(sess, ckpt.model_checkpoint_path)
updateTarget(targetOps,
             sess)  #Set the target network to be equal to the primary network.

tf.summary.FileWriter("logs/", self.sess.graph)
#所有episode的运行过程
for i in range(1, num_episodes):
    episodeBuffer0 = priorized_experience_buffer()  #首先明确好经验池的大小问题
    environment = sumo.reset()  #重置环境
    current_state = sumo.getState()  #拿到环境的状态信息
    current_phases = list(init_phases)  #将初始相位设置成list
    wait_time_map = {}
    wait_time = 0  #我添加的,因为wait_time没有定义
    done = False
    rAll = 0  #奖励值进行不断的累加
    j = 0  #一个episode下步数累加器
    flag = False
    r_plot_i = []
    q_plot_i = []
    #这三个参数可以根据实际的来调整,先训练一会,然后累计前相位的相关参数
    pre_queue = 12
    pre_delay = 1000
    pre_throughput = 20
    print('step:' + str(i) + ',precent:%.3f' % (i / num_episodes * 100) + '%')
Example #2
0
                           location=[0, 50])
        car2 = car_env.car(id=1,
                           role='follower',
                           ingaged_in_platoon=False,
                           tar_interDis=car_env.DES_PLATOON_INTER_DISTANCE,
                           tar_speed=60.0 / 3.6,
                           location=[0, 25])
        # 将新车加入车队
        if len(Carlist) == 0:
            Carlist.append(car1)
            Carlist.append(car2)
        # 设置参与车队的车辆,根据build_platoon,更新是否加入platoon的标志位
        car_env.CarList_update_platoon_info(Carlist,
                                            des_platoon_size=2,
                                            build_platoon=True)
        observation = car_env.reset(Carlist)
        ep_r = 0

        # 开始每个episode的运算
        while True:
            # 时间戳更新
            time_tag += car_env.AI_DT

            action = RL.choose_action(observation)

            # 更新运动学参数。由于c++程序的3D和CarAI的时钟不同步,需要模仿那个程序进行多轮次更新
            # 实际上多轮次更新得到的数据更贴近连续值
            observation_, done, info = car_env.step_next(
                Carlist, time_tag, action)

            # 计算单步奖励