Example #1
0
    def __init__(self, state_size, action_size, random_seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
    def __init__(self, state_size, action_size, random_seed, memory=None):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        if memory is None:
            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                       random_seed)
        else:
            self.memory = memory
Example #3
0
    def __init__(self, args, state_size, action_size):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            
        """
        self.args = args
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(self.args.seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size,
                                 action_size,
                                 self.args.seed,
                                 fc1_units=self.args.hidden_1_size,
                                 fc2_units=self.args.hidden_2_size,
                                 fc3_units=self.args.hidden_3_size).to(device)
        self.actor_target = Actor(state_size,
                                  action_size,
                                  self.args.seed,
                                  fc1_units=self.args.hidden_1_size,
                                  fc2_units=self.args.hidden_2_size,
                                  fc3_units=self.args.hidden_3_size).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=self.args.lr_Actor,
                                          eps=self.args.adam_eps)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(
            state_size,
            action_size,
            self.args.seed,
            fc1_units=self.args.hidden_1_size,
            fc2_units=self.args.hidden_2_size,
            fc3_units=self.args.hidden_3_size).to(device)
        self.critic_target = Critic(
            state_size,
            action_size,
            self.args.seed,
            fc1_units=self.args.hidden_1_size,
            fc2_units=self.args.hidden_2_size,
            fc3_units=self.args.hidden_3_size).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=self.args.lr_Critic,
                                           eps=self.args.adam_eps,
                                           weight_decay=self.args.weight_decay)

        # Noise process
        self.noise = OUNoise(action_size,
                             self.args.seed,
                             sigma=self.args.noise_std)

        self.learning_starts = int(args.memory_capacity *
                                   args.learning_starts_ratio)
        self.learning_frequency = args.learning_frequency
Example #4
0
    def __init__(self, state_size, action_size, n_agents=1, seed=0):
        """Initialize an Agent object.
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            n_agents: number of agents it will control in the environment
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = np.random.seed(seed)
        random.seed(seed)
        self.n_agents = n_agents

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size,
                                 action_size,
                                 leak=LEAKINESS,
                                 seed=seed).to(device)
        self.actor_target = Actor(state_size,
                                  action_size,
                                  leak=LEAKINESS,
                                  seed=seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size,
                                   action_size,
                                   leak=LEAKINESS,
                                   seed=seed).to(device)
        self.critic_target = Critic(state_size,
                                    action_size,
                                    leak=LEAKINESS,
                                    seed=seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC)
        # self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        self.timesteps = 0