# Deep Deterministic Policy Gradient (DDPG) print("Training algorithm: Deep Deterministic Policy Gradient (DDPG)") trainer = DDPGTrainer( env=env_title, config={ "num_workers": num_workers, "num_cpus_per_worker": num_cpus_per_worker, "num_gpus": num_gpus, "num_gpus_per_worker": num_gpus_per_worker, "model": nw_model, "lr": lr, "gamma": gamma, "actor_hiddens": [hidden_layer_size, hidden_layer_size], "critic_hiddens": [hidden_layer_size, hidden_layer_size], "multiagent": { "policy_graphs": policy_graphs, "policy_mapping_fn": policy_mapping_fn, "policies_to_train": ["agent_policy{}".format(i) for i in range(n_agents)], }, "callbacks": { "on_episode_start": tune.function(on_episode_start), "on_episode_step": tune.function(on_episode_step), "on_episode_end": tune.function(on_episode_end), }, "log_level": "ERROR", }) elif(train_algo == "A3C"):
net_file= '/home/sonic/Desktop/sumo-rl-research-offset/sumo-rl-research/experiments/nets/Research/case04/intersection.net.xml', route_file= '/home/sonic/Desktop/sumo-rl-research-offset/sumo-rl-research/experiments/nets/Research/case04/intersection.rou.xml', out_csv_path='outputs/case04/', out_csv_name='DDPG', use_gui=False, num_seconds=12240612, time_to_load_vehicles=612, max_depart_delay=0)) trainer = DDPGTrainer( env="2TLS", config={ "multiagent": { "policy_graphs": { 'offset_agent': (DDPGTFPolicy, spaces.Box(low=np.zeros(2), high=np.array(['inf'] * 2)), spaces.Box(low=np.array([0, 0]), high=np.array([+1, +1])), {}) }, "policy_mapping_fn": policy_mapping # Traffic lights are always controlled by this policy }, "lr": 0.0001, }) while True: result = trainer.train() # /home/sonic/Desktop/sumo-rl-research-offset/sumo-rl-research/experiments/