def __init__(self,
                 seed=0,
                 noise_start=1.0,
                 gamma=0.99,
                 t_stop_noise=30000,
                 action_size=2,
                 n_agents=2,
                 buffer_size=100000,
                 batch_size=256,
                 update_every=2,
                 noise_decay=1.0):
        """
        Params
        ======
            action_size (int): dimension of each action
            seed (int): Random seed
            n_agents (int): number of distinct agents
            buffer_size (int): replay buffer size
            batch_size (int): minibatch size
            gamma (float): discount factor
            noise_start (float): initial noise weighting factor
            noise_decay (float): noise decay rate
            update_every (int): how often to update the network
            t_stop_noise (int): max number of timesteps with noise applied in training
        """

        self.seed = seed
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.update_every = update_every
        self.gamma = gamma
        self.n_agents = n_agents
        self.noise_weight = noise_start
        self.noise_decay = noise_decay
        self.t_step = 0
        self.noise_on = True
        self.t_stop_noise = t_stop_noise

        # create two agents, each with their own actor and critic
        models = [
            model.Actor_Critic_Models(n_agents=n_agents)
            for _ in range(n_agents)
        ]
        self.agents = [DDPG(i, models[i], self.seed) for i in range(n_agents)]

        # create shared replay buffer
        self.memory = ReplayBuffer(action_size, self.buffer_size,
                                   self.batch_size, self.seed)
예제 #2
0
파일: maddpg.py 프로젝트: qchaldemer/maddp
    def __init__(self, action_size=2, n_agents=2, seed=0):
        """
        Params
        ======
            action_size (int): dimension of each action
            seed (int): Random seed
            n_agents (int): number of agents
        """

        self.n_agents = n_agents
        self.t_step = 0
        self.noise_on = True

        # create two agents, each with their own actor and critic
        models = [
            model.Actor_Critic_Models(n_agents=n_agents)
            for _ in range(n_agents)
        ]
        self.agents = [DDPG(i, models[i]) for i in range(n_agents)]

        # create shared replay buffer
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
    def __init__(self,
                 action_size=2,
                 seed=0,
                 n_agents=2,
                 buffer_size=10000,
                 batch_size=256,
                 gamma=0.99,
                 update_every=2):
        """
        Params
        ======
            action_size (int): dimension of each action
            seed (int): Random seed
            n_agents (int): number of distinct agents
            buffer_size (int): replay buffer size
            batch_size (int): minibatch size
            gamma (float): discount factor
            update_every (int): how often to update the network
        """

        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.update_every = update_every
        self.gamma = gamma
        self.n_agents = n_agents
        self.t_step = 0
        self.noise_on = True

        # create two agents, each with their own actor and critic
        models = [
            model.Actor_Critic_Models(n_agents=n_agents)
            for _ in range(n_agents)
        ]
        self.agents = [DDPG(i, models[i]) for i in range(n_agents)]

        # create shared replay buffer
        self.memory = ReplayBuffer(action_size, self.buffer_size,
                                   self.batch_size, seed)