Beispiel #1
0
    def __init__(self, state_size, action_size, agent_id):

        self.state_size  = state_size
        self.action_size = action_size
        self.seed        = args['seed']
        self.device      = args['device']
        #self.args        = args

        # Q-Network
        self.actor_network    = ActorNetwork(state_size, action_size).to(self.device)
        self.actor_target     = ActorNetwork(state_size, action_size).to(self.device)
        self.actor_optimizer  = optim.Adam(self.actor_network.parameters(), lr=args['LR_ACTOR'])
        
        #Model takes too long to run --> load model weights from previous run (took > 24hours on my machine)
        #if not agent_id:
        #    self.actor_network.load_state_dict(torch.load(args['agent_p0_path']), strict=False)
        #    self.actor_target.load_state_dict(torch.load(args['agent_p0_path']), strict=False)
        #else:
        #    self.actor_network.load_state_dict(torch.load(args['agent_p1_path']), strict=False)
        #    self.actor_target.load_state_dict(torch.load(args['agent_p1_path']), strict=False)
        
        # Replay memory
        self.memory      = ReplayBuffer(action_size, args['BUFFER_SIZE'], args['BATCH_SIZE'], self.device, self.seed)
        
        # Noise process
        self.noise       = OUNoise(action_size, self.seed)
        
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step      = 0
        
        self.mCriticLoss = 0
        
        self.actorLoss   = 0
Beispiel #2
0
    def __init__(self, state_size, action_size, state_size_full,
                 action_size_full, random_seed):

        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic = Critic(state_size_full, action_size_full,
                             random_seed).to(device)
        self.critic_target = Critic(state_size_full, action_size_full,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # initialize targets same as original networks
        self.hard_update(self.actor_target, self.actor)
        self.hard_update(self.critic_target, self.critic)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)
Beispiel #3
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size

        # Actor Networks both Local and Target.
        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        # Critic Networks both Local and Target.
        self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)
        self.noise_modulation = 1
        self.noise_decay = NOISE_DECAY

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)

        # Count number of steps
        self.n_steps = 0
        self.update_every = UPDATE_EVERY
    def __init__(self, state_size, action_size, num_agents, noise,
                 learning_rate, memory, random_seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.noise = noise
        self.learning_rate = learning_rate
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=self.learning_rate)

        # Noise process
        self.noise = OUNoise(action_size, seed=random_seed)

        # Replay memory
        #self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
        self.memory = memory
    def __init__(self, state_space, action_space, buffer_size, batch_size,learning_rate_actor, learning_rate_critic,update_rate, gamma, tau, device, seed, num_agents, epsilon, epsilon_decay, epsilon_min):
        self.num_agents = num_agents
        self.action_space = action_space
        self.state_space = state_space
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.step_count = 0.
        self.update_rate = update_rate
        self.tau = tau
        self.seed = seed
        self.device= device
        self.gamma = gamma
        self.actor_local_network = ActorNetwork(state_space, action_space, device, seed).to(device)
        self.actor_target_network = ActorNetwork(state_space, action_space, device, seed).to(device)
        self.critic_local_network = CriticNetwork(state_space, action_space, device, seed).to(device)
        self.critic_target_network = CriticNetwork(state_space, action_space, device, seed).to(device)
        
        
        self.actor_optimizer = torch.optim.Adam(self.actor_local_network.parameters(), lr=learning_rate_actor)
        self.critic_optimizer = torch.optim.Adam(self.critic_local_network.parameters(), lr=learning_rate_critic)
 
        self.noise = OUNoise(action_space, seed)
        self.memory = ReplayBuffer(buffer_size = self.buffer_size, batch_size=self.batch_size, 
                                   device=device, seed=seed)
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
Beispiel #6
0
    def __init__(self, state_size, action_size):

        # Constants
        self.buffer_size = int(1e6)
        self.batch_size = 128
        self.learning_rate = 1e-4
        self.learn_every = 2
        self.learning_rounds = 4

        self.gamma = 0.99
        self.tau = 1e-3

        self.t = 0
        self.state_size = state_size
        self.action_size = action_size
        self.eps = 5.0
        self.eps_decay = 1 / (300 * self.learning_rounds)

        self.actor_local = Actor(state_size, action_size).to(device)
        self.actor_target = Actor(state_size, action_size).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=self.learning_rate)

        self.critic_local = Critic(state_size, action_size).to(device)
        self.critic_target = Critic(state_size, action_size).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=self.learning_rate)

        self.noise = OUNoise((1, action_size))
        self.memory = ReplayBuffer(action_size, self.buffer_size,
                                   self.batch_size)
Beispiel #7
0
    def __init__(self, config):
        self.config = config
        self.state_size = config.state_size
        self.action_size = config.action_size

        self.actor_local = Actor(self.state_size, self.action_size,
                                 2).to(device)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  2).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=config.LR_ACTOR)

        self.critic_local = Critic(self.state_size, self.action_size,
                                   2).to(device)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    2).to(device)
        self.critic_optimizer = optim.Adam(
            self.critic_local.parameters(),
            lr=config.LR_CRITIC,
        )

        self.memory = ReplayBuffer(config.random_seed, config.BUFFER_SIZE)
        self.noise = OUNoise(self.action_size, config.random_seed)

        self.t_step = 0

        self.soft_update(self.critic_local, self.critic_target, 1)
        self.soft_update(self.actor_local, self.actor_target, 1)
Beispiel #8
0
    def __init__(self, state_size, action_size, seed):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.t_step = 0  # counter for activating learning every few steps
        self.running_c_loss = 0
        self.running_a_loss = 0
        self.training_cnt = 0

        # Actor network (w/ target network)
        self.actor_local = Actor(state_size, action_size, seed).to(device)
        self.actor_target = Actor(state_size, action_size, seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic network (w/ target network)
        self.critic_local = Critic(state_size, action_size, seed).to(device)
        self.critic_target = Critic(state_size, action_size, seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, seed)

        # Prioritized replay memory
        self.prioritized_memory = PrioritizedMemory(BATCH_SIZE, BUFFER_SIZE,
                                                    seed)
Beispiel #9
0
    def __init__(self,
                 state_size,
                 action_size,
                 num_agents,
                 actor_network_units,
                 critic_network_units,
                 optimizer_learning_rate_actor=1e-3,
                 optimizer_learning_rate_critic=1e-3,
                 actor_weight_decay=0,
                 critic_weight_decay=0,
                 noise_scale=0.1,
                 noise_theta=0.2,
                 noise_sigma=0.2,
                 device=None):
        """ Initializes the training instance for a single agent.

        :param state_size:  (int) Space size for state observations per agent
        :param action_size:  (int) Space size for actions per agent
        :param num_agents: (int) Number of agents used in problem
        :param actor_network_units:  (list of ints) Network topology for actor networks
        :param critic_network_units:  (list of ints) Network topology for critic networks
        :param optimizer_learning_rate_actor:  (float)  Learning rate for actor loss optimizer
        :param optimizer_learning_rate_critic:  (float)  Learning rate for critic loss optimizer
        :param optimizer_weight_decay_actor:  (float) Weight decay for actor loss optimizer
        :param optimizer_weight_decay_critic:  (float)  Weight decay for critic loss optimizer
        :param noise_scale:  (float)  Scale for noise process
        :param noise_theta:  (float)  Theta parameter for noise process
        :param noise_sigma:  (float)  Sigma parameter for noise process
        :param device:  (torch.device)  Object representing the device where to allocate tensors
        """
        if device is None:
            device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")

        self.actor = Actor(state_size, action_size,
                           actor_network_units).to(device)
        self.target_actor = Actor(state_size, action_size,
                                  actor_network_units).to(device)

        self.critic = Critic(state_size * num_agents, action_size * num_agents,
                             critic_network_units).to(device)
        self.target_critic = Critic(state_size * num_agents,
                                    action_size * num_agents,
                                    critic_network_units).to(device)

        self.noise = OUNoise(device,
                             action_size,
                             scale=noise_scale,
                             mu=0,
                             theta=noise_theta,
                             sigma=noise_sigma)

        self.actor_optimizer = Adam(self.actor.parameters(),
                                    lr=optimizer_learning_rate_actor,
                                    weight_decay=actor_weight_decay)
        self.critic_optimizer = Adam(self.critic.parameters(),
                                     lr=optimizer_learning_rate_critic,
                                     weight_decay=critic_weight_decay)

        self.hard_update()
Beispiel #10
0
    def __init__(self, engine):
        self.task = engine
        self.width = engine.width
        self.height = engine.height
        self.state_size = engine.state_size
        self.action_size = engine.action_size
        self.action_low = engine.action_low
        self.action_high = engine.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high,self.width,self.height)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high,self.width,self.height)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size,self.width,self.height)
        self.critic_target = Critic(self.state_size, self.action_size,self.width,self.height)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)
        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters
    def __init__(self, action_size, action_type, state_size, hidden_in_size,
                 hidden_out_size, num_atoms, lr_actor, lr_critic, l2_decay,
                 noise_type, OU_mu, OU_theta, OU_sigma):
        super(DDPGAgent, self).__init__()

        # creating actors, critics and targets using the specified layer sizes. Note for the critics we assume 2 agents
        self.actor = Actor(action_size, state_size, hidden_in_size,
                           hidden_out_size, action_type).to(device)
        self.critic = Critic(2 * action_size, 2 * state_size, hidden_in_size,
                             hidden_out_size, num_atoms).to(device)
        self.target_actor = Actor(action_size, state_size, hidden_in_size,
                                  hidden_out_size, action_type).to(device)
        self.target_critic = Critic(2 * action_size, 2 * state_size,
                                    hidden_in_size, hidden_out_size,
                                    num_atoms).to(device)
        self.noise_type = noise_type
        self.action_type = action_type

        if noise_type == 'OUNoise':  # if we're using OUNoise it needs to be initialised as it is an autocorrelated process
            self.noise = OUNoise(action_size, OU_mu, OU_theta, OU_sigma)

        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        # initialize optimisers using specigied learning rates
        self.actor_optimizer = Adam(self.actor.parameters(),
                                    lr=lr_actor,
                                    weight_decay=l2_decay)
        self.critic_optimizer = Adam(self.critic.parameters(),
                                     lr=lr_critic,
                                     weight_decay=l2_decay)
Beispiel #12
0
    def __init__(self, state_size, action_size):
        """
        Initializes Agent object.
        @Param:
        1. state_size: dimension of each state.
        2. action_size: number of actions.
        """
        self.state_size = state_size
        self.action_size = action_size
        
        #Actor network
        self.actor_local = Actor(self.state_size, self.action_size).to(device) #local model
        self.actor_target = Actor(self.state_size, self.action_size).to(device) #target model, TD-target
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) #initialize optimizer using Adam as regularizer for Actor network.

        #Critic network
        self.critic_local = Critic(self.state_size, self.action_size).to(device) #local model
        self.critic_target = Critic(self.state_size, self.action_size).to(device) #target model, TD-target
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) #initialize optimizer using Adam as regularizer for Critic network.

        #Noise proccess
        self.noise = OUNoise(action_size) #define Ornstein-Uhlenbeck process

        #Replay memory
        self.memory = ReplayBuffer(self.action_size, BUFFER_SIZE, MINI_BATCH) #define experience replay buffer object
Beispiel #13
0
    def __init__(self, name, state_size, action_size, joint_state_size,
                 joint_action_size, actor_lr, critic_lr, device):
        self.name = name
        self.device = device
        self.noise = OUNoise(action_size, sigma=0.1)

        self.actor_local = Actor(state_size, action_size, fc1=64,
                                 fc2=64).to(device)
        self.actor_target = Actor(state_size, action_size, fc1=64,
                                  fc2=64).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=actor_lr)

        self.critic_local = Critic(joint_state_size,
                                   joint_action_size,
                                   fc1=64,
                                   fc2=64).to(device)
        self.critic_target = Critic(joint_state_size,
                                    joint_action_size,
                                    fc1=64,
                                    fc2=64).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=critic_lr)

        self.hard_copy_weights(self.actor_target, self.actor_local)
        self.hard_copy_weights(self.critic_target, self.critic_local)
Beispiel #14
0
    def __init__(self, state_size, action_size, random_seed, num_agents,
                 device, hps):
        self.noise = OUNoise(action_size, random_seed)
        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.count = 0
        # setting the hyperparameters
        self.batch_size = hps.batch_size
        self.tau = hps.tau
        self.lr_actor = hps.lr_actor
        self.lr_critic = hps.lr_critic
        self.update_every = hps.update_every
        # shared replay buffer
        self.memory = ReplayBuffer(BUFFER_SIZE, self.batch_size, random_seed)

        # Critic networks - 1 network (local + target) per agent
        self.critics = [
            Critic(state_size, action_size, random_seed, self.lr_critic,
                   WEIGHT_DECAY, device) for i in range(num_agents)
        ]
        # Actor networks - 1 network (local + target) per agent
        self.actors = [
            Actor(state_size, action_size, random_seed, self.lr_actor,
                  self.noise, device) for i in range(num_agents)
        ]
Beispiel #15
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Count number of steps
        self.n_steps = 0  ###
Beispiel #16
0
    def __init__(self,
                 in_actor,
                 out_actor,
                 in_critic,  # e.g. = n_agent * (state_size + action_size)
                 lr_actor=1e-4,
                 lr_critic=1e-3,  # better learn faster than actor
                 random_seed=2):
        self.state_size = in_actor
        self.action_size = out_actor
        self.seed = random.seed(random_seed)

        self.params = {"lr_actor": lr_actor,
                       "lr_critic": lr_critic,
                       "optimizer": "adam"}

        self.local_actor = Actor(in_shape=in_actor, out_shape=out_actor).to(device)
        self.target_actor = Actor(in_shape=in_actor, out_shape=out_actor).to(device)
        self.actor_optimizer = optim.Adam(self.local_actor.parameters(), lr=lr_actor)

        # for a single agent, critic takes global observations as input, and output action-value Q
        # e.g. global_states = all_states + all_actions
        self.local_critic = Critic(in_shape=in_critic).to(device)
        self.target_critic = Critic(in_shape=in_critic).to(device)
        self.critic_optimizer = optim.Adam(self.local_critic.parameters(), lr=lr_critic)

        # Q: should local/target start with same weights ? synchronized after first copy after all
        # A: better hard copy at the beginning
        hard_update_A_from_B(self.target_actor, self.local_actor)
        hard_update_A_from_B(self.target_critic, self.local_critic)

        # Noise process
        self.noise = OUNoise(out_actor, scale=1.0)
    def __init__(self, action_size, state_size,buffer_size, batch_size,actor_lr,critic_lr,device,weight_decay, tau,shared_memory,noise,
    share_memory_flag, seed=0):
        self.state_size  = state_size
        self.action_size = action_size
        self.buffer_size = buffer_size
        self.batch_size  = batch_size
        self.actor_lr = actor_lr
        self.weight_decay = weight_decay
        self.device = device
        self.seed= seed
        self.actor_loss =[]
        #self.critic_loss =[]
        torch.manual_seed(seed)
        np.random.seed(seed)
        self.tau = tau
        self.noise= OUNoise(self.action_size,self.seed)
        #self.noise = noise
        self.share_memory_flag = share_memory_flag
        if self.share_memory_flag:
            self.memory = shared_memory
        else:
            self.memory = ReplayBuffer(action_size, buffer_size, batch_size, self.device)

        ## Actor
        self.actor_local = ActorNN(self.state_size,self.action_size).to(self.device)
        self.actor_target = ActorNN(self.state_size,self.action_size).to(self.device)
        self.actor_optimizer = Adam(self.actor_local.parameters(), lr = self.actor_lr)
        ## Critic
        #self.critic_local = Critic(self.state_size,self.action_size).to(self.device)
        #self.critic_target = Critic(self.state_size,self.action_size).to(self.device)
        #self.critic_optimizer = Adam(self.critic_local.parameters(), lr = self.critic_lr,  weight_decay=self.weight_decay)
        # initialize targets same as original networks
        self.hard_update(self.actor_target, self.actor_local)
Beispiel #18
0
    def __init__(self, config):
        """Initialize an Agent object.

        Args:
            param1: (config)
        """

        self.state_size = config.state_dim
        self.action_size = config.action_dim
        self.seed = np.random.seed(config.seed)
        self.n_agents = config.n_agents
        self.batch_size = config.batch_size
        self.tau = config.tau
        self.gamma = config.gamma
        self.device = config.device
        # Actor Network (w/ Target Network)
        self.actor_local = Actor(config).to(config.device)
        self.actor_target = Actor(config).to(config.device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=config.lr_actor)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(config).to(config.device)
        self.critic_target = Critic(config).to(config.device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=config.lr_critic)

        # Noise process
        self.noise = OUNoise(config)

        # Replay memory
        self.memory = ReplayBuffer(config)
Beispiel #19
0
    def __init__(self, state_size, action_size, agent_id, random_seed):
        """Initialize a ddpg_agent object.
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            agent_id (int): identifier for this agent
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.agent_id = agent_id

        self.actor_local = Actor(state_size, action_size).to(device)
        self.actor_target = Actor(state_size, action_size).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        self.critic_local = Critic(state_size, action_size).to(device)
        self.critic_target = Critic(state_size, action_size).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Make sure that the target-local model pairs are initialized to the
        # same weights
        self.hard_update(self.actor_local, self.actor_target)
        self.hard_update(self.critic_local, self.critic_target)

        self.noise = OUNoise(action_size, random_seed)

        self.noise_amplification = NOISE_AMPLIFICATION
        self.noise_amplification_decay = NOISE_AMPLIFICATION_DECAY
Beispiel #20
0
    def __init__(self,
                 state_size,
                 action_size,
                 memory,
                 device='cpu',
                 params=None):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            memory (obj): Memory buffer to sample
            device (str): device string between cuda:0 and cpu
            params (dict): hyper-parameters
        """
        self.state_size = state_size
        self.action_size = action_size
        self.device = device
        self.step_t = 0
        self.update_every = params['update_every']

        # Set parameters
        self.gamma = params['gamma']
        self.tau = params['tau']
        self.seed = random.seed(params['seed'])

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, params['seed'],
                                 params['actor_units'][0],
                                 params['actor_units'][1]).to(device)
        self.actor_target = Actor(state_size, action_size, params['seed'],
                                  params['actor_units'][0],
                                  params['actor_units'][1]).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=params['lr_actor'])

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, params['seed'],
                                   params['critic_units'][0],
                                   params['critic_units'][1]).to(device)
        self.critic_target = Critic(state_size, action_size, params['seed'],
                                    params['critic_units'][0],
                                    params['critic_units'][1]).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=params['lr_critic'],
                                           weight_decay=params['weight_decay'])

        # Noise process
        self.noise = OUNoise(action_size,
                             params['seed'],
                             theta=params['noise_theta'],
                             sigma=params['noise_sigma'])

        # Replay memory
        self.memory = memory
    def __init__(self,
                 num_agents,
                 state_size,
                 action_size,
                 gamma,
                 tau,
                 learning_rate_actor,
                 learning_rate_critic,
                 weight_decay,
                 device,
                 random_seed=42):
        """Initialize an Agent object (used my MultiAgent for MADDPG).

        Params
        ======
            num_agents (list): number of agents acting in the environment
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            gamma (float): discount factor
            tau (float): used for soft update of target parameters
            learning_rate_actor (float): learning rate for the actor
            learning_rate_critic (float): learning rate for the critic
            weight_decay (float): weight decay for the optimizers
            device (torch.Device): pytorch device
            random_seed (int): random seed
        """

        self.gamma = gamma
        self.tau = tau
        self.device = device
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=learning_rate_actor,
                                          weight_decay=weight_decay)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(num_agents, state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(num_agents, state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=learning_rate_critic,
                                           weight_decay=weight_decay)  #0.0001

        # Noise process
        self.noise = OUNoise(size=action_size, seed=random_seed)

        self.timestep = 0
Beispiel #22
0
 def __init__(self, state_size, action_size, random_seed, num_agents):
     self.num_agents = num_agents
     self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                random_seed)
     self.noise = OUNoise(action_size, random_seed)
     self.actors = [
         ActorAgent(i, state_size, action_size, random_seed, LR_ACTOR,
                    self.noise, self.memory) for i in range(num_agents)
     ]
     self.critic = CriticAgent(state_size, action_size, random_seed,
                               LR_CRITIC, WEIGHT_DECAY, TAU)
     self.count = 0
class ActorAgent():
    """Interacts with and learns from the environment."""
    def __init__(self, state_size, action_size, num_agents, noise,
                 learning_rate, memory, random_seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.noise = noise
        self.learning_rate = learning_rate
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=self.learning_rate)

        # Noise process
        self.noise = OUNoise(action_size, seed=random_seed)

        # Replay memory
        #self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
        self.memory = memory

    def step(self, state, action, reward, next_state, done):
        """Save experience in replay memory, and use random sample from buffer to learn."""
        # Save experience / reward
        self.memory.add(state, action, reward, next_state, done)

    def act(self, state, add_noise=True):
        """Returns actions for given state as per current policy."""
        state = torch.from_numpy(state).float().to(device)
        self.actor_local.eval()
        with torch.no_grad():
            action = self.actor_local(state).cpu().data.numpy()
        self.actor_local.train()
        if add_noise:
            action += self.noise.sample()
        return np.clip(action, -1, 1)

    def reset(self):
        self.noise.reset()
Beispiel #24
0
    def __init__(self,
                 state_size,
                 action_size,
                 device,
                 actor_args={},
                 critic_args={}):
        """Initializes the DQN agent.

        Args:
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            device (torch.device): Device to use for calculations
            actor_args (dict): Arguments describing the actor network
            critic_args (dict): Arguments describing the critic network
        """
        self.state_size = state_size
        """Dimension of each state"""

        self.action_size = action_size
        """Dimension of each action"""

        self.device = device
        """Device to use for calculations"""

        self.t_step = 0
        """Timestep between training updates"""

        # Parameters

        # Actor network
        self.actor_local = Actor(state_size, action_size,
                                 **actor_args).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  **actor_args).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic network

        self.critic_local = Critic(state_size, action_size,
                                   **critic_args).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    **critic_args).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process for exploration
        self.noise = OUNoise(action_size, sigma=NOISE_SD)

        # Replay memory
        self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE, self.device)
Beispiel #25
0
class DDPG:
    def __init__(self,
                 in_actor,
                 out_actor,
                 in_critic,  # e.g. = n_agent * (state_size + action_size)
                 lr_actor=1e-4,
                 lr_critic=1e-3,  # better learn faster than actor
                 random_seed=2):
        self.state_size = in_actor
        self.action_size = out_actor
        self.seed = random.seed(random_seed)

        self.params = {"lr_actor": lr_actor,
                       "lr_critic": lr_critic,
                       "optimizer": "adam"}

        self.local_actor = Actor(in_shape=in_actor, out_shape=out_actor).to(device)
        self.target_actor = Actor(in_shape=in_actor, out_shape=out_actor).to(device)
        self.actor_optimizer = optim.Adam(self.local_actor.parameters(), lr=lr_actor)

        # for a single agent, critic takes global observations as input, and output action-value Q
        # e.g. global_states = all_states + all_actions
        self.local_critic = Critic(in_shape=in_critic).to(device)
        self.target_critic = Critic(in_shape=in_critic).to(device)
        self.critic_optimizer = optim.Adam(self.local_critic.parameters(), lr=lr_critic)

        # Q: should local/target start with same weights ? synchronized after first copy after all
        # A: better hard copy at the beginning
        hard_update_A_from_B(self.target_actor, self.local_actor)
        hard_update_A_from_B(self.target_critic, self.local_critic)

        # Noise process
        self.noise = OUNoise(out_actor, scale=1.0)

    def act(self, obs, noise_scale=0.0):
        obs = obs.to(device)
        # debug noise
        # noise = torch.from_numpy(noise_scale*0.5*np.random.randn(1, self.action_size)).float().to(device)
        # action = self.local_actor(obs) + noise
        action = self.local_actor(obs) + noise_scale * self.noise.noise().to(device)
        return action

    def target_act(self, obs, noise_scale=0.0):
        obs = obs.to(device)
        # noise = torch.from_numpy(noise_scale*0.5 * np.random.randn(1, self.action_size)).float().to(device)
        # action = self.target_actor(obs) + noise_scale * noise
        action = self.target_actor(obs) + noise_scale * self.noise.noise().to(device)
        return action

    def reset(self):
        self.noise.reset()
    def __init__(self,
                 state_size,
                 action_size,
                 random_seed,
                 mnoise=True,
                 split_state=True):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.mnoise = mnoise
        self.split_state = split_state

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # initialize targets same as original networks
        self.hard_update(self.actor_target, self.actor_local)
        self.hard_update(self.critic_target, self.critic_local)

        # Noise process
        if self.mnoise:
            self.noise = OUNoise((2, action_size), random_seed)
        else:
            self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)
Beispiel #27
0
    def __init__(self, env, gamma, tau, buffer_maxlen, batch_size,
                 critic_learning_rate, actor_learning_rate, update_per_step,
                 seed):
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        # hyperparameters
        self.num_replay_updates_per_step = update_per_step
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau

        # initialize actor and critic networks
        self.critic = Critic(env.observation_space.shape[0],
                             env.action_space.shape[0], seed).to(self.device)
        self.critic_target = Critic(env.observation_space.shape[0],
                                    env.action_space.shape[0],
                                    seed).to(self.device)

        self.actor = Actor(env.observation_space.shape[0],
                           env.action_space.shape[0], seed).to(self.device)
        self.actor_target = Actor(env.observation_space.shape[0],
                                  env.action_space.shape[0],
                                  seed).to(self.device)

        # optimizers
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=critic_learning_rate)
        self.actor_optimizer = optim.Adam(self.actor.parameters(),
                                          lr=actor_learning_rate)

        self.buffer = ReplayBuffer(buffer_maxlen, batch_size, seed)
        self.noise = OUNoise(env.action_space.shape[0])
Beispiel #28
0
    def __init__(self, state_size, action_size, n_agents, lr_actor=0.01, lr_critic=0.01):
        super(DDPGAgent, self).__init__()

        self.actor = Actor(state_size, action_size, seed=0).to(device)
        self.critic = Critic(state_size, action_size, n_agents).to(device)
        self.target_actor = Actor(state_size, action_size, seed=0).to(device)
        self.target_critic = Critic(state_size, action_size, n_agents).to(device)

        self.noise = OUNoise(action_size, scale=1.0)

        # initialize targets same as original networks
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

        self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
        self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1.e-5)
Beispiel #29
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = [OUNoise(action_size, random_seed) for i in range(NUM_AGENTS)]

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #30
0
 def __init__(self, env):
     self.env = env
     #self.stateDim = obs2state(env.reset().observation).size()[1]
     #self.actionDim = env.action_spec().shape[0]
     self.stateDim = env.observation_space.shape[0]
     self.actionDim = env.action_space.shape[0]
     self.actor = Actor(self.env)
     self.critic = Critic(self.env)
     self.targetActor = deepcopy(Actor(self.env))
     self.targetCritic = deepcopy(Critic(self.env))
     self.actorOptim = optim.Adam(self.actor.parameters(), lr=ACTOR_LR)
     self.criticOptim = optim.Adam(self.critic.parameters(), lr=CRITIC_LR)
     self.criticLoss = nn.MSELoss()
     self.noise = OUNoise(mu=np.zeros(self.actionDim), sigma=SIGMA)
     self.replayBuffer = Buffer(BUFFER_SIZE)
     self.batchSize = MINIBATCH_SIZE
     self.checkpoint_dir = CHECKPOINT_DIR
     self.discount = DISCOUNT
     self.warmup = WARMUP
     self.epsilon = EPSILON
     self.epsilon_decay = EPSILON_DECAY
     self.rewardgraph = []
     self.stepgraph = []
     self.start = 0
     self.end = NUM_EPISODES