예제 #1
0
from multiagent.environment import MultiAgentEnv
import multiagent.scenarios as scenarios
import torch
import numpy as np

from agent import DDPGAgent
from maddpg import MADDPG
from utils import MultiAgentReplayBuffer


def make_env(scenario_name, benchmark=False):
    # load scenario from script
    scenario = scenarios.load(scenario_name + ".py").Scenario()
    # create world
    world = scenario.make_world()
    # create multiagent environment
    if benchmark:
        env = MultiAgentEnv(world, scenario.reset_world, scenario.reward,
                            scenario.observation, scenario.benchmark_data)
    else:
        env = MultiAgentEnv(world, scenario.reset_world, scenario.reward,
                            scenario.observation)
    return env


env = make_env(scenario_name="simple_spread")

ma_controller = MADDPG(env, 1000000)
ma_controller.run(500, 300, 32)
예제 #2
0
        self.reset()

    def reset(self):
        self.states = self.env.reset()
        self.step = 0

    def get_actions(self, states):
        actions = []
        for i in range(self.num_agents):
            action = self.agents[i].get_action(states[i])
            actions.append(action)
        return actions

    def run(self):
        for i in range(200):
            actions = self.get_actions(self.states)
            next_states, rewards, dones, _ = self.env.step(actions)
            self.env.render()
    
            if all(dones) or self.step == 199:# cfg.max_episode_len - 1:
                self.reset()
                break
            else:
                dones = [0 for _ in range(self.num_agents)]
                self.states = next_states
                self.step += 1

if __name__ == '__main__':
    maddpg = MADDPG()
    maddpg.run()