# 把键盘控制的车辆放到指定的点,同时关闭随机起点 env_dict[keyboard_agent_name].vehicle_start_point_index = 0 env_dict[keyboard_agent_name].use_random_start_point = False # 把模仿对象的RL功能打开 worker_kwargs[keyboard_agent_name]["do_RL_learn"] = True # 键盘控制的对象进行debug,观察是否模型进行了训练! # worker_kwargs[keyboard_agent_name]["debug"] = True DDPG_Agent_GAL_v1( env_prototype_dict_for_workers=env_dict, save_dir="./ddpg_IAJR_ckpt/", model_hook_dict=hook_dict, kwargs_for_model_dict=model_kwargs, kwargs_for_worker_dict=worker_kwargs, kwargs_for_global_model={ # 预先求得准确q值给数据集 "use_pre_calculated_g": use_pre_calculated_g, "gamma": gamma }).start() # 注意:如果下面的代码没有运行,是上面start过后有join函数等待 # 然后打开keyboard hook控制 kh.start_keyboard_control() # 接下来就是找到键盘控制的车辆,持续用键盘修改油门刹车转向! # 主线程等待join while 1: pass
} worker_kwargs[name] = { "start_variance_for_each_action": (0, 0.0), "variance_decay_ratio_for_each_action": (0.995, 0.995), "variance_decay_step": 10, "start_offset_for_each_action": (0.0, 0.0), "offset_decay_value_for_each_action": (0.00, 0.00), "offset_decay_step": 2000 } agent = DDPG_Agent_GAL_v1( env_prototype_dict_for_workers=env_dict, save_dir="./ddpg_ckpt/", # 这两个参数是嵌套字典 kwargs_for_model_dict=model_kwargs, kwargs_for_worker_dict=worker_kwargs, kwargs_for_global_model={ # 预先求得准确q值给数据集 "use_pre_calculated_g": use_pre_calculated_g, "gamma": gamma }) model_trainer = FloatActionTrainer( # 输入state input_placeholder=agent.global_model.S, # 输出state的频率 output_graph=agent.global_model.a, action_space_size=(agent.action_space, ), tf_sess=agent.sess) # 然后agent先启动 agent.start()
carla_UE_ip=ip, carla_UE_port=port, n_waypoint=100, # DDPG没有IL相对难训练,所以间隔小一些! waypoint_spacing=3, vehicle_start_point_index=spawn_index_for_each_car_in_worker[ i], wait_time_after_apply_action=0.1, ratio_of_reaching=0.3, add_center_lane_state=True, # 这里是DDPG和A3C算法的区别,使用连续空间的action action_replace=ContinuousSteeringVelocityBrakeAction_v1(), # 实测DDPG和A3C表现差异很大,因此单独设计它的reward试试? #reward_replace= ) env_dict[name] = env worker_kwargs[name] = { "start_variance": 0.0, # debug时方差小一些,便于观察走势 "variance_decay": 0.99, "debug": True } # model_kwargs[name] = { # } DDPG_Agent_GAL_v1( env_prototype_dict_for_workers=env_dict, save_dir="./a3c_gal_ckpt/", kwargs_for_worker_dict=worker_kwargs, ).start()