sess.run(init) tf.global_variables_initializer().run() if load_model == True: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(path) saver.restore(sess, ckpt.model_checkpoint_path) updateTarget(targetOps, sess) #Set the target network to be equal to the primary network. tf.summary.FileWriter("logs/", self.sess.graph) #所有episode的运行过程 for i in range(1, num_episodes): episodeBuffer0 = priorized_experience_buffer() #首先明确好经验池的大小问题 environment = sumo.reset() #重置环境 current_state = sumo.getState() #拿到环境的状态信息 current_phases = list(init_phases) #将初始相位设置成list wait_time_map = {} wait_time = 0 #我添加的,因为wait_time没有定义 done = False rAll = 0 #奖励值进行不断的累加 j = 0 #一个episode下步数累加器 flag = False r_plot_i = [] q_plot_i = [] #这三个参数可以根据实际的来调整,先训练一会,然后累计前相位的相关参数 pre_queue = 12 pre_delay = 1000 pre_throughput = 20 print('step:' + str(i) + ',precent:%.3f' % (i / num_episodes * 100) + '%')
location=[0, 50]) car2 = car_env.car(id=1, role='follower', ingaged_in_platoon=False, tar_interDis=car_env.DES_PLATOON_INTER_DISTANCE, tar_speed=60.0 / 3.6, location=[0, 25]) # 将新车加入车队 if len(Carlist) == 0: Carlist.append(car1) Carlist.append(car2) # 设置参与车队的车辆,根据build_platoon,更新是否加入platoon的标志位 car_env.CarList_update_platoon_info(Carlist, des_platoon_size=2, build_platoon=True) observation = car_env.reset(Carlist) ep_r = 0 # 开始每个episode的运算 while True: # 时间戳更新 time_tag += car_env.AI_DT action = RL.choose_action(observation) # 更新运动学参数。由于c++程序的3D和CarAI的时钟不同步,需要模仿那个程序进行多轮次更新 # 实际上多轮次更新得到的数据更贴近连续值 observation_, done, info = car_env.step_next( Carlist, time_tag, action) # 计算单步奖励