# Deep Deterministic Policy Gradient (DDPG)
  print("Training algorithm: Deep Deterministic Policy Gradient (DDPG)")

  trainer = DDPGTrainer(
              env=env_title,
              config={
                "num_workers": num_workers,
                "num_cpus_per_worker": num_cpus_per_worker,
                "num_gpus": num_gpus,
                "num_gpus_per_worker": num_gpus_per_worker,
                "model": nw_model,
                "lr": lr,
                "gamma": gamma,
                "actor_hiddens": [hidden_layer_size, hidden_layer_size],
                "critic_hiddens": [hidden_layer_size, hidden_layer_size],
                "multiagent": {
                  "policy_graphs": policy_graphs,
                  "policy_mapping_fn": policy_mapping_fn,
                  "policies_to_train": ["agent_policy{}".format(i) for i in range(n_agents)],
                },
                "callbacks": {
                  "on_episode_start": tune.function(on_episode_start),
                  "on_episode_step": tune.function(on_episode_step),
                  "on_episode_end": tune.function(on_episode_end),
                },
                "log_level": "ERROR",
              })
             

elif(train_algo == "A3C"):
  
コード例 #2
0
            net_file=
            '/home/sonic/Desktop/sumo-rl-research-offset/sumo-rl-research/experiments/nets/Research/case04/intersection.net.xml',
            route_file=
            '/home/sonic/Desktop/sumo-rl-research-offset/sumo-rl-research/experiments/nets/Research/case04/intersection.rou.xml',
            out_csv_path='outputs/case04/',
            out_csv_name='DDPG',
            use_gui=False,
            num_seconds=12240612,
            time_to_load_vehicles=612,
            max_depart_delay=0))

    trainer = DDPGTrainer(
        env="2TLS",
        config={
            "multiagent": {
                "policy_graphs": {
                    'offset_agent': (DDPGTFPolicy,
                                     spaces.Box(low=np.zeros(2),
                                                high=np.array(['inf'] * 2)),
                                     spaces.Box(low=np.array([0, 0]),
                                                high=np.array([+1, +1])), {})
                },
                "policy_mapping_fn":
                policy_mapping  # Traffic lights are always controlled by this policy
            },
            "lr": 0.0001,
        })

    while True:
        result = trainer.train()
# /home/sonic/Desktop/sumo-rl-research-offset/sumo-rl-research/experiments/