use_pre_calculated_g = False gamma = 0.99 for ip in server_config: for port in server_config[ip]: for i in range(n_workers_in_each_port): name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i) ) # worker name env = LaneFollowEnv_v1( carla_egg_path= "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg", carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla", carla_UE_ip=ip, carla_UE_port=port, vehicle_start_point_index=spawn_index_for_each_car_in_worker[ i], wait_time_after_apply_action=0.1, action_replace=ContinuousSteeringVelocityBrakeAction_v1(), reward_replace= FinalStepDriveDistanceAverageSpeedAverageDistanceToLaneCost_v1( ), minimum_velocity=0.5, drive_time_after_reset=2.0, kwargs_for_done_condition={ "minimum_action_taken_on_low_velocity": 0 }) env_dict[name] = env model_kwargs[name] = { "use_pre_calculated_g": use_pre_calculated_g, "gamma": gamma } worker_kwargs[name] = { "start_variance_for_each_action": (0, 0.0),
for ip in server_config: for port in server_config[ip]: for i in range(n_workers_in_each_port): name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i) ) # worker name env = LaneFollowEnv_v1( # ''' # 有些地方道路曲率大(90度),车道线拟合不好导致认为出了车道,因此放宽车道条件 # 但是放宽了条件后注意车辆会出现随意切换车道的现象! # 不过只要预先训练好,就根本不会出现变道的情况 # ''' maximum_distance_to_lane=0.5, # 建议只在debug阶段plot plot_lane_on_UE=False, use_random_start_point=True, carla_egg_path=carla_egg_path, carla_pythonAPI_path=carla_pythonAPI_path, carla_UE_ip=ip, carla_UE_port=port, wait_time_after_apply_action=0.1, action_replace=ContinuousSteeringVelocityBrakeAction_v1(), reward_replace=SafeDriveDistanceCost(), minimum_velocity=0.5, drive_time_after_reset=2.0, kwargs_for_done_condition={ "minimum_action_taken_on_low_velocity": 0 }) env_dict[name] = env model_kwargs[name] = { "use_pre_calculated_g": use_pre_calculated_g, "gamma": gamma }
use_pre_calculated_g = False gamma = 0.99 for ip in server_config: for port in server_config[ip]: for i in range(n_workers_in_each_port): name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i) ) # worker name env = LaneFollowEnv_v1( carla_egg_path= "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg", carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla", carla_UE_ip=ip, carla_UE_port=port, use_random_start_point=True, wait_time_after_apply_action=0.1, # 这里是DDPG和A3C算法的区别,使用连续空间的action action_replace=ContinuousSteeringVelocityBrakeAction_v1(), reward_replace=SafeDriveDistanceCost(), minimum_velocity=0.5, drive_time_after_reset=2.0, kwargs_for_done_condition={ "minimum_action_taken_on_low_velocity": 0 }) env_dict[name] = env model_kwargs[name] = { "gamma": gamma, "use_pre_calculated_g": use_pre_calculated_g, } worker_kwargs[name] = { # 因为是模仿为主,所以RL不进行探索 "start_variance_for_each_action": (0., 0.),
env = LaneFollowEnv_v1( # 随机起点,有助于测试过拟合情况 use_random_start_point=True, carla_egg_path= "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg", carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla", carla_UE_ip=ip, carla_UE_port=port, vehicle_start_point_index=spawn_index_for_each_car_in_worker[ i], wait_time_after_apply_action=0.1, # 这里是DDPG和A3C算法的区别,使用连续空间的action action_replace=ContinuousSteeringVelocityBrakeAction_v1(), # 对于DDPG不能使用有正值的reward,而使用cost # 有效,很慢:使用安全驾驶的距离作为cost,只有最后一步有cost,然后按照gamma向前传播 # reward_replace=SafeDriveDistanceCost(), # 使用到车道线距离作为cost # reward_replace=DistanceToLaneCost(), # 有效,但很慢:使用速度,车道线,总路程综合reward reward_replace= FinalStepDriveDistanceAverageSpeedAverageDistanceToLaneCost_v1( ), # 最小速度降低,便于转弯 minimum_velocity=0.5, drive_time_after_reset=2.0, kwargs_for_done_condition={ # 因为有预先开出,所以再次降速后直接done "minimum_action_taken_on_low_velocity": 0 })
carla_egg_path = "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg" carla_pythonAPI_path = "/home/wang/Desktop/carla/PythonAPI/carla" use_pre_calculated_g = False gamma = 0.99 for ip in server_config: for port in server_config[ip]: for i in range(n_workers_in_each_port): name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i) ) # worker name env = LaneFollowEnv_v1( use_random_start_point=True, carla_egg_path=carla_egg_path, carla_pythonAPI_path=carla_pythonAPI_path, carla_UE_ip=ip, carla_UE_port=port, wait_time_after_apply_action=0.1, action_replace=ContinuousSteeringVelocityBrakeAction_v1(), reward_replace=SafeDriveDistanceCost(), minimum_velocity=0.5, drive_time_after_reset=2.0, kwargs_for_done_condition={ "minimum_action_taken_on_low_velocity": 0 }) env_dict[name] = env model_kwargs[name] = { "use_pre_calculated_g": use_pre_calculated_g, "gamma": gamma } worker_kwargs[name] = { "start_variance_for_each_action": (1, 1.0), "variance_decay_ratio_for_each_action": (0.995, 0.995), "variance_decay_step": 10,
model_kwargs = {} for ip in server_config: for port in server_config[ip]: for i in range(n_workers_in_each_port): name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i) ) # worker name env = LaneFollowEnv_v1( carla_egg_path= "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg", carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla", carla_UE_ip=ip, carla_UE_port=port, vehicle_start_point_index=spawn_index_for_each_car_in_worker[ i], wait_time_after_apply_action=0.1, # 这里是DDPG和A3C算法的区别,使用连续空间的action action_replace=ContinuousSteeringVelocityBrakeAction_v1(), # 对于DDPG不能使用有正值的reward,而使用cost reward_replace=SafeDriveDistanceCost(), drive_time_after_reset=2.0, kwargs_for_done_condition={ # 因为有预先开出,所以再次降速后直接done "minimum_action_taken_on_low_velocity": 0 }) env_dict[name] = env model_kwargs[name] = { # 预先求得准确q值给数据集,这里local模型和后面的global都需要! "use_pre_calculated_g": False, "gamma": 0.9 } worker_kwargs[name] = {
for ip in server_config: for port in server_config[ip]: for i in range(n_workers_in_each_port): name = 'W_%s' % (str(ip) + "_" + str(port) + "_" + str(i) ) # worker name # 拿到最后一个worker的name,用于延迟debug delay_debug_worker_name = name env = LaneFollowEnv_v1( carla_egg_path= "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg", carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla", carla_UE_ip=ip, carla_UE_port=port, vehicle_start_point_index=spawn_index_for_each_car_in_worker[ i], wait_time_after_apply_action=0.1, # 使用高精度控制的action action_replace=HighPrecisionControl_v1(), # 对于a3c,车道线的限制不需要太严格,否则也很难训练! maximum_distance_to_lane=10, ) env_dict[name] = env delay_debug_worker_name = [] # 用于延迟debug的worker name,注释掉以关闭最后一个worker进行delay debug #delay_debug_worker_name = [delay_debug_worker_name] # 最后用name:env的字典传入env,env用于agent训练,每个env分配一个worker A3C_GAL_Train_Agent_v1(
''' from ReinforcementLearning.Modules.Agents.DDPG_Agent import DDPG_Agent_v1 from ReinforcementLearning.Modules.Environments.Environments_laneFollow import LaneFollowEnv_v1 from ReinforcementLearning.Modules.Environments.Actions import ContinuousSteeringVelocityBrakeAction_v1 from ReinforcementLearning.Modules.Environments.Rewards import DistanceToLaneCost, SafeDriveDistanceCost env = LaneFollowEnv_v1( carla_egg_path= "/home/wang/Desktop/carla/PythonAPI/carla/dist/carla-0.9.5-py2.7-linux-x86_64.egg", carla_pythonAPI_path="/home/wang/Desktop/carla/PythonAPI/carla", carla_UE_ip="10.10.9.128", carla_UE_port=2000, vehicle_start_point_index=0, wait_time_after_apply_action=0.1, action_replace=ContinuousSteeringVelocityBrakeAction_v1(), reward_replace=SafeDriveDistanceCost(), # 预先开车2.0秒 drive_time_after_reset=2.0, kwargs_for_done_condition={ # 因为有预先开出,所以再次降速后直接done "minimum_action_taken_on_low_velocity": 0 } #reward_replace=SafeDriveDistanceReward_v1(), ) # 注意使用的q值是手动计算得到的 DDPG_Agent_v1(env=env, kwargs_for_model={ "gamma": 0.9 }, use_model_with_pre_calculated_g=True).train()