Python Agent.actor_local Exemples

Langage de programmation: Python

Espace de nommage/Pack: ddpg_agent

Class/Type: Agent

Méthode/Fonction: actor_local

Exemples au hotexamples.com: 4

Python Agent.actor_local - 4 exemples trouvés. Ce sont les exemples réels les mieux notés de ddpg_agent.Agent.actor_local extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Agent(30)

act(30)

step(27)

reset(23)

actor_local(4)

actor_target(3)

save(3)

load(2)

remember(2)

memory(2)

memorize(2)

load_actor(2)

load_critic(2)

choose_action(2)

critic_target(2)

critic_local(2)

learn(2)

ave_loss(1)

update(1)

store(1)

actor_optimizer(1)

set_hparams(1)

sense(1)

save_models(1)

learn_from_players(1)

reset_episode(1)

critic_optimizer(1)

load_weights(1)

load_models(1)

load_model(1)

Explore(1)

cum_rewards(1)

update_step(1)

Méthodes fréquemment utilisées

Agent (30)

act (30)

step (27)

reset (23)

actor_local (4)

actor_target (3)

save (3)

load (2)

remember (2)

memory (2)

Méthodes fréquemment utilisées

memorize (2)

load_actor (2)

load_critic (2)

choose_action (2)

critic_target (2)

critic_local (2)

learn (2)

ave_loss (1)

update (1)

store (1)

actor_optimizer (1)

set_hparams (1)

sense (1)

save_models (1)

learn_from_players (1)

reset_episode (1)

critic_optimizer (1)

load_weights (1)

load_models (1)

load_model (1)

Méthodes fréquemment utilisées

actor_optimizer (1)

set_hparams (1)

sense (1)

save_models (1)

learn_from_players (1)

reset_episode (1)

critic_optimizer (1)

load_weights (1)

load_models (1)

load_model (1)

Explore (1)

cum_rewards (1)

update_step (1)

Méthodes fréquemment utilisées

Explore (1)

cum_rewards (1)

update_step (1)

Exemple #1

0

Afficher le fichier

print('Size of observations: {}'.format(self.n_states())) print('Example state:', self.states()[0]) def close(self): self.env.close() env = UnityEnvWrapper(no_graphics=False) agent1 = Agent(state_size=env.n_states() + 1, action_size=env.n_actions(), random_seed=2) agent2 = Agent(state_size=env.n_states() + 1, action_size=env.n_actions(), random_seed=2) agent2.actor_local = agent1.actor_local agent2.actor_target = agent1.actor_target agent2.actor_optimizer = agent1.actor_optimizer print(env.n_agents(), env.n_states(), env.n_actions()) def play(): agent1.actor_local.load_state_dict(torch.load('checkpoint_actor_1.pth')) agent1.actor_local.eval() # agent2.actor_local.load_state_dict(torch.load('checkpoint_actor_2.pth')) # agent2.actor_local.eval() state = env.reset(train_mode=False) while True: state1 = np.concatenate([state[0], [1]])

Exemple #2

0

Afficher le fichier

Fichier : maddpg_agents.py Projet : Zhamissimova/Collaboration-and-Competition

class MADDPG(): def __init__(self, state_size, action_size, random_seed): """Initialize 2 Agent objects. Params ====== state_size (int): dimension of one agent's observation action_size (int): dimension of each action """ self.state_size = state_size self.action_size = action_size # Initialize the agents self.ddpg_agent0 = Agent(state_size, action_size, random_seed=0) self.ddpg_agent1 = Agent(state_size, action_size, random_seed=1) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed) def act(self, states, rand=False): """Agents act with actor_local""" if rand == False: action0 = self.ddpg_agent0.act(states[0]) action1 = self.ddpg_agent1.act(states[1]) actions = [action0, action1] return actions if rand == True: actions = np.random.randn(2, 2) actions = np.clip(actions, -1, 1) return actions def step(self, states, actions, rewards, next_states, dones, learn=True): """Save experience in replay memory, and use random sample from buffer to learn.""" # Save experience / reward state0 = states[0] state1 = states[1] action0 = actions[0] action1 = actions[1] reward0 = rewards[0] reward1 = rewards[1] next_state0 = next_states[0] next_state1 = next_states[1] done0 = dones[0] done1 = dones[1] self.memory.add(state0, state1, action0, action1, reward0, reward1, next_state0, next_state1, done0, done1) if learn == True and len(self.memory) > BATCH_SIZE: experiences = self.memory.sample() self.learn(experiences, GAMMA) def learn(self, experiences, GAMMA): s0, s1, a0, a1, r0, r1, next_s0, next_s1, d0, d1 = experiences # next actions (for CRITIC network) a_next0 = self.ddpg_agent0.actor_target(next_s0) a_next1 = self.ddpg_agent1.actor_target(next_s1) # action predictions (for ACTOR network) a_pred0 = self.ddpg_agent0.actor_local(s0) a_pred1 = self.ddpg_agent1.actor_local(s1) # ddpg agents learn separately, each agent learns from its perspective, that is why states, actions, etc are swapped self.ddpg_agent0.learn(s0, s1, a0, a1, r0, r1, next_s0, next_s1, d0, d1, a_next0, a_next1, a_pred0, a_pred1) self.ddpg_agent1.learn(s1, s0, a1, a0, r1, r0, next_s1, next_s0, d1, d0, a_next1, a_next0, a_pred1, a_pred0)

Exemple #3

0

Afficher le fichier

# size of each action action_size = brain.vector_action_space_size print('Size of each action:', action_size) # examine the state space states = env_info.vector_observations state_size = states.shape[1] print('There are {} agents. Each observes a state with length: {}'.format( states.shape[0], state_size)) print('The state for the first agent looks like:', states[0]) agent_1 = Agent(state_size=state_size, action_size=action_size, random_seed=2) agent_2 = Agent(state_size=state_size, action_size=action_size, random_seed=3) agent_2.memory = agent_1.memory agent_2.actor_local = agent_1.actor_local agent_2.actor_target = agent_1.actor_target agent_2.critic_local = agent_1.critic_local agent_2.critic_target = agent_1.critic_target t_max = 1000 print_every = 100 maxlen = 100 score = [] ev_score = [] scores_deque = deque(maxlen=maxlen) for i_episode in range(1, env.n_episodes + 1): # play game for 5 episodes env_info = env.reset(train_mode=True)[brain_name] # reset the environment states = env_info.vector_observations # get the current state (for each agent) scores = np.zeros(num_agents) # initialize the score (for each agent) agent_1.reset()

Exemple #4

0

Afficher le fichier

Fichier : reacher.py Projet : yongkyuns/reinforcement-learning-continuous-control

# size of each action action_size = brain.vector_action_space_size print('Size of each action:', action_size) # examine the state space states = env_info.vector_observations state_size = states.shape[1] print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size)) print('The state for the first agent looks like:', states[0]) random_seed = 1 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") scores = np.zeros(num_agents) if torch.cuda.is_available(): trained_model = torch.load('checkpoint_actor.pth') else: trained_model = torch.load('checkpoint_actor.pth',map_location={'cuda:0': 'cpu'}) agent = Agent(state_size=state_size, action_size=action_size, random_seed=random_seed) agent.actor_local = Actor(state_size, action_size, random_seed).to(device) agent.actor_local.load_state_dict(trained_model) env_info = env.reset(train_mode=False)[brain_name] # reset the environment states = env_info.vector_observations # get the current state (for each agent) while True: action = agent.act(states, add_noise=False) env_info = env.step(action)[brain_name] states = env_info.vector_observations # get next state (for each agent)