Exemple #1
0
    def __init__(self, state_size=OBS_DIM, action_size=ACT_DIM, random_seed=0):
        """Initialize an Agent object.
        Params
        =====
            state_size (int): dimension of all observation
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.epsilon = EPSILON

        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC)

        self.noise = OUNoise(action_size, random_seed)
        self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE, random_seed)
Exemple #2
0
    def __init__(self, state_size, action_size, seed):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.t_step = 0  # counter for activating learning every few steps
        self.running_c_loss = 0
        self.running_a_loss = 0
        self.training_cnt = 0

        # Actor network (w/ target network)
        self.actor_local = Actor(state_size, action_size, seed).to(device)
        self.actor_target = Actor(state_size, action_size, seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic network (w/ target network)
        self.critic_local = Critic(state_size, action_size, seed).to(device)
        self.critic_target = Critic(state_size, action_size, seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, seed)

        # Prioritized replay memory
        self.prioritized_memory = PrioritizedMemory(BATCH_SIZE, BUFFER_SIZE,
                                                    seed)
Exemple #3
0
    def __init__(self,
                 action_size=4,
                 state_size=33,
                 num_agents=20,
                 max_steps=1000,
                 seed=0,
                 train_mode=True):
        self.train_mode = train_mode
        self.action_size = action_size
        self.state_size = state_size
        self.num_agents = num_agents
        self.max_steps = max_steps

        self.step_count = 0
        self.scores = np.zeros(self.num_agents)
        self.states, self.actions, self.rewards, self.next_states, self.dones = None, None, None, None, None

        self.noise = OUNoise(self.action_size, seed)
        self.memory = AgentMemory(batch_size=BATCH_SIZE,
                                  buffer_size=MEMORY_BUFFER,
                                  seed=seed)

        self.actor = Actor(self.state_size, self.action_size, seed)
        self.critic = Critic(self.state_size, self.action_size, seed)

        self.target_actor = Actor(self.state_size, self.action_size, seed)
        self.target_critic = Critic(self.state_size, self.action_size, seed)

        self.actor_opt = optim.Adam(self.actor.parameters(), lr=LR_ACTOR)
        self.critic_opt = optim.Adam(self.critic.parameters(),
                                     lr=LR_CRITIC,
                                     weight_decay=WEIGHT_DECAY)

        hard_update(self.actor, self.target_actor)
        hard_update(self.critic, self.target_critic)
Exemple #4
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        # self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        # self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        # self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        # initialize Class level Actor Network
        if Agent.actor_local is None:
            Agent.actor_local = Actor(state_size, action_size,
                                      random_seed).to(device)
        if Agent.actor_target is None:
            Agent.actor_target = Actor(state_size, action_size,
                                       random_seed).to(device)
        if Agent.actor_optimizer is None:
            Agent.actor_optimizer = optim.Adam(Agent.actor_local.parameters(),
                                               lr=LR_ACTOR)
        self.actor_local = Agent.actor_local
        self.actor_target = Agent.actor_target
        self.actor_optimizer = Agent.actor_optimizer

        # Critic Network (w/ Target Network)
        #         self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        #         self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        #         self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)

        # Initilise Class levell Critic Network
        if Agent.critic_local is None:
            Agent.critic_local = Critic(state_size, action_size,
                                        random_seed).to(device)
        if Agent.critic_target is None:
            Agent.critic_target = Critic(state_size, action_size,
                                         random_seed).to(device)
        if Agent.critic_optimizer is None:
            Agent.critic_optimizer = optim.Adam(
                Agent.critic_local.parameters(),
                lr=LR_CRITIC,
                weight_decay=WEIGHT_DECAY)
        self.critic_local = Agent.critic_local
        self.critic_target = Agent.critic_target
        self.critic_optimizer = Agent.critic_optimizer

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory - only intitialise once per class
        if Agent.memory is None:
            print("Initialising ReplayBuffer")
            Agent.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                        random_seed)
Exemple #5
0
    def __init__(self, nb_state, nb_action):
        self.nb_state = nb_state
        self.nb_action = nb_action

        self.actor = Actor(self.nb_state, self.nb_action)
        self.actor_target = Actor(self.nb_state, self.nb_action)
        self.actor_optim = Adam(self.actor.parameters(), lr=LEARNING_RATE)

        self.critic = Critic(self.nb_state, self.nb_action)
        self.critic_target = Critic(self.nb_state, self.nb_action)
        self.critic_optim = Adam(self.critic.parameters(), lr=LEARNING_RATE)

        hard_update(self.actor_target,
                    self.actor)  # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)

        #Create replay buffer
        self.memory = SequentialMemory(limit=MEMORY_SIZE, window_length=1)
        self.random_process = OrnsteinUhlenbeckProcess(size=nb_action,
                                                       theta=OU_THETA,
                                                       mu=OU_MU,
                                                       sigma=OU_SIGMA)

        self.is_training = True
        self.epsilon = 1.0
        self.a_t = None
        self.s_t = None

        if USE_CUDA: self.cuda()
Exemple #6
0
    def __init__(self, state_size, action_size, seed):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.t_step = 0  # counter for activating learning every few steps
        self.TAU = 1e-2
        self.gamma = 0.99
        self.BUFFER_SIZE = int(1e6)
        self.BATCH_SIZE = 1024
        self.LR_CRITIC = 1e-3
        self.LR_ACTOR = 1e-3
        self.WEIGHT_DECAY = 0.0
        self.EPSILON = 1.0
        self.EPSILON_DECAY = 0.99

        # Actor network (w/ target network)
        self.actor_local = Actor(self.state_size, self.action_size,
                                 seed).to(device)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=self.LR_ACTOR)

        # Critic network (w/ target network)
        self.critic_local = Critic(self.state_size, self.action_size,
                                   seed).to(device)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=self.LR_CRITIC,
                                           weight_decay=self.WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(self.action_size, self.seed)
Exemple #7
0
    def __init__(self, state_size, action_size, random_seed):
        """
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        if Agent.critic_local is None:
            Agent.critic_local = Critic(state_size, action_size, random_seed).to(device)
        if Agent.critic_target is None:
            Agent.critic_target = Critic(state_size, action_size, random_seed).to(device)
        if Agent.critic_optimizer is None:
            Agent.critic_optimizer = optim.Adam(Agent.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
        self.critic_local = Agent.critic_local
        self.critic_target = Agent.critic_target
        self.critic_optimizer = Agent.critic_optimizer

        self.noise = OUNoise(action_size, random_seed)

        # Replay memory - one per class
        if Agent.memory is None:
            print("Initialising ReplayBuffer")
            Agent.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)

        self.agent_num=len(Agent.instances)
        Agent.instances.append(self)
        print("Appended to Agent.instances agent {}".format(self.agent_num))
    def __init__(self, state_size, action_size, random_seed):
        """
        Initialize the agent object
        
        Params:
        ======
            state_size (int): dimension of each state 
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.t_step = 0 # Time steps for updating the local actor
        
        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)
        
        
        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
        
        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
Exemple #9
0
    def __init__(self,
                 state_size,
                 action_size,
                 memory,
                 device='cpu',
                 params=None):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            memory (obj): Memory buffer to sample
            device (str): device string between cuda:0 and cpu
            params (dict): hyper-parameters
        """
        self.state_size = state_size
        self.action_size = action_size
        self.device = device
        self.step_t = 0
        self.update_every = params['update_every']

        # Set parameters
        self.gamma = params['gamma']
        self.tau = params['tau']
        self.seed = random.seed(params['seed'])

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, params['seed'],
                                 params['actor_units'][0],
                                 params['actor_units'][1]).to(device)
        self.actor_target = Actor(state_size, action_size, params['seed'],
                                  params['actor_units'][0],
                                  params['actor_units'][1]).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=params['lr_actor'])

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, params['seed'],
                                   params['critic_units'][0],
                                   params['critic_units'][1]).to(device)
        self.critic_target = Critic(state_size, action_size, params['seed'],
                                    params['critic_units'][0],
                                    params['critic_units'][1]).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=params['lr_critic'],
                                           weight_decay=params['weight_decay'])

        # Noise process
        self.noise = OUNoise(action_size,
                             params['seed'],
                             theta=params['noise_theta'],
                             sigma=params['noise_sigma'])

        # Replay memory
        self.memory = memory
Exemple #10
0
    def __init__(self, state_size, action_size, random_seed, device="cpu"):
        """Initialize an Agent object.
        
        Params
        ------
            state_size : int
                dimension of each state
            action_size : int
                dimension of each action
            random_seed : int
                random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.device = device

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(self.device)
        self.actor_local.apply(initialize_weights)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(self.device)
        self.actor_target.apply(initialize_weights)
        self.actor_target.eval()
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(self.device)
        self.critic_local.apply(initialize_weights)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(self.device)
        self.critic_target.apply(initialize_weights)
        self.critic_target.eval()
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size,
                             random_seed + 1,
                             mu=0.,
                             theta=THETA,
                             sigma=SIGMA)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed + 2, self.device)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Exemple #11
0
	def __init__(self,env_name, policy_config,device = 'cpu'):
		self.device = device
		self.env = gym.make(env_name) #仅仅用于设置observation
		self.obs_dim = self.env.observation_space.shape[0]
		if isinstance(self.env.action_space, gym.spaces.Box):
			self.action_dim = self.env.action_space.shape[0]
		elif isinstance(self.env.action_space, gym.spaces.Discrete):
			raise TypeError('Unsupported action type')
		else:
			raise ValueError('unsupport action ', type(self.action_dim))

		self.action_limit = self.env.action_space.high[0]
		self.lr = policy_config['lr']
		self.actor = Actor(self.obs_dim, self.action_dim).to(device)
		self.critic = Critic(self.obs_dim, self.action_dim).to(device)
		self.actor_target = deepcopy(self.actor)
		self.critic_target = deepcopy(self.critic)

		hard_update(self.actor_target, self.actor)  # Make sure target is with the same weight
		hard_update(self.critic_target, self.critic)

		self.actor_optim =  torch.optim.Adam(params=self.actor.parameters(), lr=0.001)
		self.critic_optim =  torch.optim.Adam(params=self.critic.parameters(), lr=0.001)
		self.discount_factor = policy_config['discount_factor']
		self.tau = 0.005
        def __init__(self, state_size, action_size, random_seed):

            self.actor_local = Actor(state_size, action_size,
                                     random_seed).to(device)
            self.actor_target = Actor(state_size, action_size,
                                      random_seed).to(device)
            self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                              lr=LR_ACTOR)

            # Critic Network (w/ Target Network)
            self.critic_local = Critic(state_size, action_size,
                                       random_seed).to(device)
            self.critic_target = Critic(state_size, action_size,
                                        random_seed).to(device)
            self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                               lr=LR_CRITIC,
                                               weight_decay=WEIGHT_DECAY)
            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                       random_seed)
Exemple #13
0
    def __init__(self, state_size, action_size, random_seed, memory,
                 update_every, device):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = memory

        # Update weights parameters
        self.update_every = update_every
        self.step_count = 0

        self.device = device
Exemple #14
0
    def __init__(self, state_size, action_size, num_agents, random_seed):
        """Initialize an Agent object.

        Arguments:
            state_size (int) -- dimension of each state
            action_size (int) -- dimension of each action
            num_agents (int) -- number of agents (brains)
            random_seed (int) -- random seed
        """

        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = random.seed(random_seed)
        self.epsilon = EPSILON

        ### Make neural networks (local and target) for both actor and critic, and set optimizers
        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise((num_agents, action_size), random_seed)

        # Initialize replay memory ###
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)
Exemple #15
0
    def __init__(self, nb_states, nb_actions, args):
        # self.cuda = USE_CUDA #args.cuda
        self.cuda = args.cuda

        self.nb_states = nb_states
        self.nb_actions = nb_actions

        #Init models
        #actor_kwargs = {'n_inp':self.nb_states, 'n_feature_list':[args.hidden1,args.hidden2], 'n_class':self.nb_actions}
        #self.actor = MLP(**actor_kwargs)
        #self.actor_target = MLP(**actor_kwargs)
        #self.critic = MLP(**actor_kwargs)  #TODO: actor and critic has same structure for now.
        #self.critic_target = MLP(**actor_kwargs)

        net_cfg = {
            'hidden1': args.hidden1,
            'hidden2': args.hidden2,
            'init_w': args.init_w
        }
        self.actor = Actor(self.nb_states, self.nb_actions, **net_cfg)
        self.actor_target = Actor(self.nb_states, self.nb_actions, **net_cfg)

        self.critic = Critic(self.nb_states, self.nb_actions, **net_cfg)
        self.critic_target = Critic(self.nb_states, self.nb_actions, **net_cfg)

        self.criterion = nn.MSELoss()
        if self.cuda:
            self.actor = self.actor.cuda(
            )  # torch.nn.DataParallel(self.model).cuda()  #TODO dataparallel not working
            self.critic = self.critic.cuda()
            self.actor_target = self.actor_target.cuda()
            self.critic_target = self.critic_target.cuda()
            self.criterion = self.criterion.cuda()

        # Set optimizer
        self.actor_optim = torch.optim.Adam(self.actor.parameters(),
                                            lr=args.prate)
        self.critic_optim = torch.optim.Adam(self.critic.parameters(),
                                             lr=args.rate)
        # Loss function
        self.loss_fn = torch.nn.MSELoss(size_average=False)

        hard_update(self.actor_target,
                    self.actor)  # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)

        self.memory = SequentialMemory(limit=args.rmsize,
                                       window_length=args.window_length)
        self.random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                                       theta=args.ou_theta,
                                                       mu=args.ou_mu,
                                                       sigma=args.ou_sigma)

        # Hyper-parameters
        self.batch_size = args.bsize
        self.tau = args.tau
        self.discount = args.discount
        self.depsilon = 1.0 / args.epsilon

        self.epsilon = 1.0
        self.s_t = None  # Most recent state
        self.a_t = None  # Most recent action
        self.is_training = True
    def __init__(self,
                 state_size,
                 action_size,
                 brain_name,
                 seed,
                 params=default_params,
                 device=None):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        params = self._fill_params(params)

        # implementation and identity
        self.device = device if device is not None else torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.name = params['name']
        self.brain_name = brain_name

        # set environment information
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size,
                                 action_size,
                                 seed,
                                 fc1_units=params['layers_actor'][0],
                                 fc2_units=params['layers_actor'][1]).to(
                                     self.device)

        self.actor_target = Actor(state_size,
                                  action_size,
                                  seed,
                                  fc1_units=params['layers_actor'][0],
                                  fc2_units=params['layers_actor'][1]).to(
                                      self.device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=params['lr_actor'])

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size,
                                   action_size,
                                   seed,
                                   fcs1_units=params['layers_critic'][0],
                                   fc2_units=params['layers_critic'][1]).to(
                                       self.device)
        self.critic_target = Critic(state_size,
                                    action_size,
                                    seed,
                                    fcs1_units=params['layers_critic'][0],
                                    fc2_units=params['layers_critic'][1]).to(
                                        self.device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=params['lr_critic'],
                                           weight_decay=params['weight_decay'])

        # Noise process
        self.noise = OUNoise(action_size, seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size,
                                   params['buffer_size'],
                                   params['batch_size'],
                                   seed,
                                   device=self.device)

        # save params
        self.params = params
    def __init__(self, 
                 state_size=None, 
                 action_size=None, 
                 random_seed=0,
                 buffer_size=int(1e6),  # replay buffer size
                 batch_size=128,        # minibatch size
                 gamma=0.99,            # discount factor
                 tau=1e-3,              # for soft update of target parameters
                 lr_actor=1e-4,         # learning rate of the actor 
                 lr_critic=1e-3,        # learning rate of the critic
                 weight_decay=0.0001    # L2 weight decay
                ):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.buffer_size = buffer_size       # replay buffer size
        self.batch_size = batch_size         # minibatch size
        self.gamma = gamma                   # discount factor
        self.tau = tau                       # for soft update of target parameters
        self.lr_actor = lr_actor             # learning rate of the actor 
        self.lr_critic = lr_critic           # learning rate of the critic
        self.weight_decay = weight_decay     # L2 weight decay

        # initialization for first class instance
        if Agent.actor_local is None:
            Agent.actor_local = Actor(state_size, action_size, random_seed).to(device)
        if Agent.actor_target is None:
            Agent.actor_target = Actor(state_size, action_size, random_seed).to(device)
        if Agent.actor_optimizer is None:
            Agent.actor_optimizer = optim.Adam(Agent.actor_local.parameters(), lr=lr_actor)
        
        # Actor Network (w/ Target Network)
        self.actor_local = Agent.actor_local
        self.actor_target = Agent.actor_target
        self.actor_optimizer = Agent.actor_optimizer

        # initialization for first class instance
        if Agent.critic_local is None:
            Agent.critic_local = Critic(state_size, action_size, random_seed).to(device)
        if Agent.critic_target is None:
            Agent.critic_target = Critic(state_size, action_size, random_seed).to(device)
        if Agent.critic_optimizer is None:
            Agent.critic_optimizer = optim.Adam(Agent.critic_local.parameters(), lr=lr_critic, weight_decay=weight_decay)
        
        # Critic Network (w/ Target Network)
        self.critic_local = Agent.critic_local
        self.critic_target = Agent.critic_target
        self.critic_optimizer = Agent.critic_optimizer

        # Noise process
        self.noise = OUNoise(action_size, random_seed, mu=MU, theta=THETA, sigma=SIGMA)
        #TODO: Maybe self.noise = OUNoise((20, action_size), random_seed)

        # Replay memory
        if Agent.memory is None:
            Agent.memory = ReplayBuffer(action_size, buffer_size, batch_size, random_seed)
    def __init__(self, num_agents, state_size, action_size, random_seed,
                 buffer_size, batch_size, gamma, TAU, lr_actor, lr_critic,
                 weight_decay, a_hidden_sizes, c_hidden_sizes):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Hyperparameters
        self.BUFFER_SIZE = buffer_size
        self.BATCH_SIZE = batch_size
        self.GAMMA = gamma
        self.TAU = TAU
        self.LR_ACTOR = lr_actor
        self.LR_CRITIC = lr_critic
        self.WEIGHT_DECAY = weight_decay
        self.ACTOR_HL_SIZE = a_hidden_sizes
        self.CRITIC_HL_SIZE = c_hidden_sizes
        self.num_agents = num_agents

        # Actor Network (w/ Target Network)
        self.actor_local_1 = Actor(state_size, action_size, random_seed,
                                   self.ACTOR_HL_SIZE).to(device)
        self.actor_target_1 = Actor(state_size, action_size, random_seed,
                                    self.ACTOR_HL_SIZE).to(device)
        self.actor_optimizer_1 = optim.Adam(self.actor_local_1.parameters(),
                                            lr=self.LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local_1 = Critic(state_size, action_size, random_seed,
                                     self.CRITIC_HL_SIZE).to(device)
        self.critic_target_1 = Critic(state_size, action_size, random_seed,
                                      self.CRITIC_HL_SIZE).to(device)
        self.critic_optimizer_1 = optim.Adam(self.critic_local_1.parameters(),
                                             lr=self.LR_CRITIC,
                                             weight_decay=self.WEIGHT_DECAY)

        # Actor Network (w/ Target Network)
        self.actor_local_2 = Actor(state_size, action_size, random_seed,
                                   self.ACTOR_HL_SIZE).to(device)
        self.actor_target_2 = Actor(state_size, action_size, random_seed,
                                    self.ACTOR_HL_SIZE).to(device)
        self.actor_optimizer_2 = optim.Adam(self.actor_local_2.parameters(),
                                            lr=self.LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local_2 = Critic(state_size, action_size, random_seed,
                                     self.CRITIC_HL_SIZE).to(device)
        self.critic_target_2 = Critic(state_size, action_size, random_seed,
                                      self.CRITIC_HL_SIZE).to(device)
        self.critic_optimizer_2 = optim.Adam(self.critic_local_2.parameters(),
                                             lr=self.LR_CRITIC,
                                             weight_decay=self.WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, self.BUFFER_SIZE,
                                   self.BATCH_SIZE, random_seed)
Exemple #19
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # initialize Class level Actor Network
        #         if Agent.actor_local is None:
        #             Agent.actor_local = Actor(state_size, action_size, random_seed).to(device)
        #         if Agent.actor_target is None:
        #             Agent.actor_target = Actor(state_size, action_size, random_seed).to(device)
        #         if Agent.actor_optimizer is None:
        #             Agent.actor_optimizer = optim.Adam(Agent.actor_local.parameters(), lr=LR_ACTOR)
        #         self.actor_local = Agent.actor_local
        #         self.actor_target = Agent.actor_target
        #         self.actor_optimizer = Agent.actor_optimizer

        # Critic Network (w/ Target Network)
        #         self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        #         self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        #         self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)

        # Initilise Class levell Critic Network
        if Agent.critic_local is None:
            Agent.critic_local = Critic(state_size, action_size,
                                        random_seed).to(device)
        if Agent.critic_target is None:
            Agent.critic_target = Critic(state_size, action_size,
                                         random_seed).to(device)
        if Agent.critic_optimizer is None:
            Agent.critic_optimizer = optim.Adam(
                Agent.critic_local.parameters(),
                lr=LR_CRITIC,
                weight_decay=WEIGHT_DECAY)
        self.critic_local = Agent.critic_local
        self.critic_target = Agent.critic_target
        self.critic_optimizer = Agent.critic_optimizer

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory - only intitialise once per class
        if Agent.memory is None:
            print("Initialising ReplayBuffer")
            Agent.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                        random_seed)
#         else:
#             print("Sharing ReplayBuffer %s", Agent.memory)

# Add this instances - we need to access all agent states whilst learning
        self.agent_num = len(Agent.instances)
        Agent.instances.append(self)
        print("Appended to Agent.instances agent {}".format(self.agent_num))
Exemple #20
0
    def __init__(self, n_state, n_action, n_agents, random_seed, device="cpu"):
        """Initialize an Agent object.
        
        Params
        ------
            n_state : int
                dimension of each state
            n_action : int
                dimension of each action
            random_seed : int
                random seed
            device :
                which device is used, cpu or cuda.
        """
        self.n_state = n_state
        self.n_action = n_action
        self.n_agents = n_agents
        self.random_seed = np.random.seed(random_seed)
        self.device = device

        # Networks for the first agent
        # Local Actor, Local Critic, Target Actor, Target Critic
        self.actor_local1 = Actor(self.n_state, self.n_action,
                                  self.random_seed).to(self.device)
        self.actor_local1.apply(initialize_weights)
        self.critic_local1 = Critic(self.n_state * self.n_agents,
                                    self.n_action * self.n_agents,
                                    self.random_seed).to(self.device)
        self.critic_local1.apply(initialize_weights)
        self.actor_target1 = Actor(self.n_state, self.n_action,
                                   self.random_seed).to(self.device)
        self.actor_target1.apply(initialize_weights)
        self.actor_target1.eval()
        self.critic_target1 = Critic(self.n_state * self.n_agents,
                                     self.n_action * self.n_agents,
                                     self.random_seed).to(self.device)
        self.critic_target1.apply(initialize_weights)
        self.critic_target1.eval()

        # Networks for the second agent
        # Local Actor, Local Critic, Target Actor, Target Critic
        self.actor_local2 = Actor(self.n_state, self.n_action,
                                  self.random_seed).to(self.device)
        self.actor_local2.apply(initialize_weights)
        self.critic_local2 = Critic(self.n_state * self.n_agents,
                                    self.n_action * self.n_agents,
                                    self.random_seed).to(self.device)
        self.critic_local2.apply(initialize_weights)
        self.actor_target2 = Actor(self.n_state, self.n_action,
                                   self.random_seed).to(self.device)
        self.actor_target2.apply(initialize_weights)
        self.actor_target2.eval()
        self.critic_target2 = Critic(self.n_state * self.n_agents,
                                     self.n_action * self.n_agents,
                                     self.random_seed).to(self.device)
        self.actor_target2.apply(initialize_weights)
        self.critic_target2.eval()

        # optimizers
        self.actor_optimizer1 = optim.Adam(self.actor_local1.parameters(),
                                           lr=LR_ACTOR)
        self.actor_optimizer2 = optim.Adam(self.actor_local2.parameters(),
                                           lr=LR_ACTOR)
        self.critic_optimizer1 = optim.Adam(self.critic_local1.parameters(),
                                            lr=LR_CRITIC,
                                            weight_decay=WEIGHT_DECAY)
        self.critic_optimizer2 = optim.Adam(self.critic_local2.parameters(),
                                            lr=LR_CRITIC,
                                            weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(n_action * 2,
                             random_seed + 1,
                             mu=0.,
                             theta=THETA,
                             sigma=SIGMA)

        # Replay Buffer
        self.memory = ReplayBuffer(n_action, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed + 2, self.device)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0