Python OrnsteinUhlenbeck Examples

Programming Language: Python

Namespace/Package Name: noise

Examples at hotexamples.com: 5

Python OrnsteinUhlenbeck - 5 examples found. These are the top rated real world Python examples of noise.OrnsteinUhlenbeck extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

OrnsteinUhlenbeck(3)

reset(2)

sample(2)

Example #1

Show file

def main():
    ou = OrnsteinUhlenbeck(mu=torch.zeros(1), sigma=0.05 * torch.ones(1))

    xs = list(range(100000))
    ys = []
    for x in xs:
        y = ou()
        ys.append(y.data)

    plt.plot(xs, ys)
    plt.show()

Example #2

Show file

    def __init__(self, state_size, action_size, fc1_units, fc2_units,
                 num_agents):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = torch.manual_seed(SEED)
        self.num_agents = num_agents
        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, fc1_units,
                                 fc2_units).to(device)
        self.actor_target = Actor(state_size, action_size, fc1_units,
                                  fc2_units).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, fc1_units,
                                   fc2_units).to(device)
        self.critic_target = Critic(state_size, action_size, fc1_units,
                                    fc2_units).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OrnsteinUhlenbeck((num_agents, action_size), SEED)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, SEED,
                                   device)

Example #3

Show file

File: ddpg.py Project: tskTNK/LunarLandingSimulation

    def __init__(self,
                 env,
                 log_dir,
                 gamma=0.99,
                 batch_size=64,
                 sigma=0.2,
                 batch_norm=True,
                 merge_layer=2,
                 buffer_size=int(1e6),
                 buffer_min=int(1e4),
                 tau=1e-3,
                 Q_wd=1e-2,
                 num_episodes=1000):

        self.s_dim = env.reset().shape[0]
        # self.a_dim = env.action_space.shape[0]
        self.a_dim = env.action_space2.shape[0]
        # self.a_dim = 1

        self.env = env
        # self.mu = Actor(self.s_dim, self.a_dim, env.action_space, batch_norm=batch_norm)
        self.mu = Actor(self.s_dim,
                        self.a_dim,
                        env.action_space2,
                        batch_norm=batch_norm)
        self.Q = Critic(self.s_dim,
                        self.a_dim,
                        batch_norm=batch_norm,
                        merge_layer=merge_layer)
        self.targ_mu = copy.deepcopy(self.mu).eval()
        self.targ_Q = copy.deepcopy(self.Q).eval()
        self.noise = OrnsteinUhlenbeck(mu=torch.zeros(self.a_dim),
                                       sigma=sigma * torch.ones(self.a_dim))
        self.buffer = Buffer(buffer_size, self.s_dim, self.a_dim)
        self.buffer_min = buffer_min
        self.mse_fn = torch.nn.MSELoss()
        self.mu_optimizer = torch.optim.Adam(self.mu.parameters(), lr=1e-4)
        self.Q_optimizer = torch.optim.Adam(self.Q.parameters(),
                                            lr=1e-3,
                                            weight_decay=Q_wd)

        self.gamma = gamma
        self.batch_size = batch_size
        self.num_episodes = num_episodes
        self.tau = tau
        self.log_dir = log_dir

        self.fill_buffer()

Example #4

Show file

class Agent():
    """ Interacts with and learns from the environment. """
    def __init__(self, state_size, action_size, fc1_units, fc2_units,
                 num_agents):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = torch.manual_seed(SEED)
        self.num_agents = num_agents
        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, fc1_units,
                                 fc2_units).to(device)
        self.actor_target = Actor(state_size, action_size, fc1_units,
                                  fc2_units).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, fc1_units,
                                   fc2_units).to(device)
        self.critic_target = Critic(state_size, action_size, fc1_units,
                                    fc2_units).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OrnsteinUhlenbeck((num_agents, action_size), SEED)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, SEED,
                                   device)

    def step(self, step, state, action, reward, next_state, done,
             agent_number):
        """Save experience in replay memory, and use random sample from buffer to learn."""
        # Save experience / reward
        self.memory.add(state, action, reward, next_state, done)

        # Learn, if enough samples are available in memory and if step > 10
        if (len(self.memory) > BATCH_SIZE) and (step % N_TIME_STEPS == 0):
            for n in range(N_LEARN_UPDATES):
                experiences = self.memory.sample()
                self.learn(experiences, GAMMA, agent_number)

    def act(self, states, add_noise=True):
        """Returns actions for both agents as per current policy, given their respective states."""
        states = torch.from_numpy(states).float().to(device)
        actions = np.zeros((self.num_agents, self.action_size))
        self.actor_local.eval()
        with torch.no_grad():
            # get action for each agent and concatenate them
            for agent_num, state in enumerate(states):
                action = self.actor_local(state).cpu().data.numpy()
                actions[agent_num, :] = action
        self.actor_local.train()
        if add_noise:
            actions += self.noise.sample()
        return np.clip(actions, -1, 1)

    def reset(self):
        self.noise.reset()

    def learn(self, experiences, gamma, agent_number):
        """Update policy and value parameters using given batch of experience tuples.
        Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
        where:
            actor_target(state) -> action
            critic_target(state, action) -> Q-value
        Params
        ======
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples 
            gamma (float): discount factor
        """
        states, actions, rewards, next_states, dones = experiences

        # ---------------------------- update critic ---------------------------- #
        # Get predicted next-state actions and Q values from target models
        actions_next = self.actor_target(next_states)

        # Construct next actions vector relative to the agent
        if agent_number == 0:
            actions_next = torch.cat((actions_next, actions[:, 2:]), dim=1)
        else:
            actions_next = torch.cat((actions[:, :2], actions_next), dim=1)
        # Compute Q targets for current states (y_i)
        Q_targets_next = self.critic_target(next_states, actions_next)
        # Compute Q targets for current states (y_i)
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
        # Compute critic loss
        Q_expected = self.critic_local(states, actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
        self.critic_optimizer.step()

        # ---------------------------- update actor ---------------------------- #
        # Compute actor loss
        actions_pred = self.actor_local(states)

        if agent_number == 0:
            actions_pred = torch.cat((actions_pred, actions[:, 2:]), dim=1)
        else:
            actions_pred = torch.cat((actions[:, :2], actions_pred), dim=1)
        actor_loss = -self.critic_local(states, actions_pred).mean()
        # Minimize the loss
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # ----------------------- update target networks ----------------------- #
        self.soft_update(self.critic_local, self.critic_target, TAU)
        self.soft_update(self.actor_local, self.actor_target, TAU)

    def soft_update(self, local_model, target_model, tau):
        """Soft update model parameters.
        θ_target = τ*θ_local + (1 - τ)*θ_target
        Params
        ======
            local_model: PyTorch model (weights will be copied from)
            target_model: PyTorch model (weights will be copied to)
            tau (float): interpolation parameter 
        """
        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1.0 - tau) * target_param.data)

Example #5

Show file

class Agent():
    """ Interacts with and learns from the environment. """
    def __init__(self, state_size, action_size, fc1_units, fc2_units):
        """Initialize an Agent object.

        Params
        ======
        state_size (int): dimension of each state
        action_size (int): dimension of each action
        random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = torch.manual_seed(SEED)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, fc1_units,
                                 fc2_units).to(device)
        self.actor_target = Actor(state_size, action_size, fc1_units,
                                  fc2_units).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size, fc1_units,
                                   fc2_units).to(device)
        self.critic_target = Critic(state_size, action_size, fc1_units,
                                    fc2_units).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OrnsteinUhlenbeck(action_size, SEED)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, SEED,
                                   device)

    def step(self, time_step, state, action, reward, next_state, done):
        """Save experience in replay memory, and use random sample from buffer to learn."""
        self.memory.add(state, action, reward, next_state, done)

        # Learn only every N_TIME_STEPS
        if time_step % N_TIME_STEPS != 0:
            return

        # Learn if enough samples are available in replay buffer
        if len(self.memory) > BATCH_SIZE:
            for i in range(N_LEARN_UPDATES):
                experiences = self.memory.sample()
                self.learn(experiences, GAMMA)

    def act(self, state, add_noise=True):
        """ Returns actions for given state as per current policy. """
        state = torch.from_numpy(state).float().to(device)
        self.actor_local.eval()
        with torch.no_grad():
            action = self.actor_local(state).cpu().data.numpy()
        self.actor_local.train()
        if add_noise:
            action += self.noise.sample()
        return np.clip(action, -1, 1)

    def reset(self):
        self.noise.reset()

    def learn(self, experiences, gamma):
        """Update policy and value parameters using given batch of experience tuples.
        Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
        where:
            actor_target(state) -> action
            critic_target(state, action) -> Q-value
        Params
        ======
            experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
            gamma (float): discount factor
        """
        states, actions, rewards, next_states, dones = experiences

        # ---------------------------- update critic ---------------------------- #
        # Get predicted next-state actions and Q values from target models
        actions_next = self.actor_target(next_states)
        Q_targets_next = self.critic_target(next_states, actions_next)
        # Compute Q targets from current states (y_i)
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
        # Compute critic loss
        Q_expected = self.critic_local(states, actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
        self.critic_optimizer.step()

        # ---------------------------- update actor ---------------------------- #
        # Compute actor loss
        actions_pred = self.actor_local(states)
        actor_loss = -self.critic_local(states, actions_pred).mean()
        # Minimize the loss
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # ----------------------- update target networks ----------------------- #
        self.soft_update(self.critic_local, self.critic_target, TAU)
        self.soft_update(self.actor_local, self.actor_target, TAU)

    def soft_update(self, local_model, target_model, tau):
        """Soft update model parameters.
        θ_target = τ*θ_local + (1 - τ)*θ_target
        Params
        ======
        local_model: PyTorch model (weights will be copied from)
        target_model: PyTorch model (weights will be copied to)
        tau (float): interpolation parameter
        """
        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1.0 - tau) * target_param.data)

    def store(self):
        torch.save(self.actor_local.state_dict(), 'checkpoint_actor.pth')
        torch.save(self.critic_local.state_dict(), 'checkpoint_critic.pth')

    def load(self):
        if os.path.isfile('checkpoint_actor.pth') and os.path.isfile(
                'checkpoint_critic.pth'):
            print("=> loading checkpoints for Actor and Critic... ")
            self.actor_local.load_state_dict('checkpoint_actor')
            self.critic_local.load_state_dict('checkpoint_critic')
            print("done !")
        else:
            print("no checkpoints found for Actor and Critic...")