Python Actor Beispiele, networks.Actor Python Beispiele

Beispiel #1

0

Datei anzeigen

    def __init__(self,
                 observation_space,
                 action_space,
                 lr,
                 gamma,
                 discrete=False,
                 project_dim=4,
                 device='cpu'):
        """
        Parameters
        ----------
        observation_space: int
            Number of flattened entries of the state
        action_space: int
            Number of (discrete) possible actions to take
        """

        self.gamma = gamma
        self.lr = lr

        self.n_actions = action_space
        self.discrete = discrete
        if self.discrete:
            self.net = Actor(observation_space, action_space, discrete,
                             project_dim)
        else:
            self.net = Actor(observation_space, action_space, discrete)
        self.optim = torch.optim.Adam(self.net.parameters(), lr=self.lr)

        self.device = device
        self.net.to(self.device)  # move network to device

Beispiel #2

0

Datei anzeigen

Datei: backup_code.py Projekt: nicoladainese96/Policy-based-RL

 def __init__(self, observation_space, action_space, lr_actor, lr_critic, gamma, 
              device='cpu', discrete=False, project_dim=8):
     """
     Parameters
     ----------
     observation_space: int
         Number of flattened entries of the state
     action_space: int
         Number of (discrete) possible actions to take
     """
     
     self.gamma = gamma
     
     self.n_actions = action_space
     self.discrete = discrete
     if self.discrete:
         self.actor = DiscreteActor(observation_space, action_space, project_dim)
         self.critic = DiscreteCritic(observation_space, project_dim)
     else:
         self.actor = Actor(observation_space, action_space)
         self.critic = Critic(observation_space)
     self.actor_optim = torch.optim.Adam(self.actor.parameters(), lr=lr_actor)
     self.critic_optim = torch.optim.Adam(self.critic.parameters(), lr=lr_critic)
     
     self.device = device

Beispiel #3

0

Datei anzeigen

class LunarLander(Problem):
    def __init__(self, continuous=True, seed=0):
        super(LunarLander, self).__init__()
        if continuous:
            self.env = gym.make('LunarLanderContinuous-v2')
        else:
            self.env = gym.make('LunarLander-v2')
        self.env.seed(seed)
        self.mu = Actor(s_dim=8, a_dim=4, h_dim=20)

    def score(self, x):
        self.mu.fill_weights(x)
        r_tot = 0.
        done = False
        s = self.env.reset()
        while not done:
            a = self.mu(s)
            s, r, done, _ = self.env.step(a)
            r_tot += r
        return -r_tot

    def score_vec(self, X):
        p = Pool(4)
        return p.map(self.score, [x for x in X])
        p.close()
        p.join()

    def save(self, x, g):
        self.mu.fill_weights(x)
        pickle.dump(self.mu, open('log/lander/models/model_' + str(g), 'wb'))

Beispiel #4

0

Datei anzeigen

Datei: agent.py Projekt: Maggern3/DDPG

    def __init__(self, state_size, action_size):
        super().__init__()
        gpu = torch.cuda.is_available()
        if (gpu):
            print('GPU/CUDA works! Happy fast training :)')
            torch.cuda.current_device()
            torch.cuda.empty_cache()
            self.device = torch.device("cuda")
        else:
            print('training on cpu...')
            self.device = torch.device("cpu")

        self.actor = Actor(state_size, action_size).to(self.device)
        self.actor_target = Actor(state_size, action_size).to(self.device)
        self.actor_optim = optim.Adam(self.actor.parameters(), lr=0.0001)
        self.critic = Critic(state_size, action_size).to(self.device)
        self.critic_target = Critic(state_size, action_size).to(self.device)
        self.critic_optim = optim.Adam(self.critic.parameters(),
                                       lr=0.001,
                                       weight_decay=0)
        self.replay_buffer = deque(maxlen=1000000)  #1m
        self.gamma = 0.95  #0.99
        self.batch_size = 128
        self.tau = 0.001
        self.seed = random.seed(2)
        self.noise = OUNoise((20, action_size), 2)
        self.target_network_update(self.actor_target, self.actor, 1.0)
        self.target_network_update(self.critic_target, self.critic, 1.0)

Beispiel #5

0

Datei anzeigen

    def __init__(self,
                 in_actor,
                 out_actor,
                 in_critic,  # e.g. = n_agent * (state_size + action_size)
                 lr_actor=1e-4,
                 lr_critic=1e-3,  # better learn faster than actor
                 random_seed=2):
        self.state_size = in_actor
        self.action_size = out_actor
        self.seed = random.seed(random_seed)

        self.params = {"lr_actor": lr_actor,
                       "lr_critic": lr_critic,
                       "optimizer": "adam"}

        self.local_actor = Actor(in_shape=in_actor, out_shape=out_actor).to(device)
        self.target_actor = Actor(in_shape=in_actor, out_shape=out_actor).to(device)
        self.actor_optimizer = optim.Adam(self.local_actor.parameters(), lr=lr_actor)

        # for a single agent, critic takes global observations as input, and output action-value Q
        # e.g. global_states = all_states + all_actions
        self.local_critic = Critic(in_shape=in_critic).to(device)
        self.target_critic = Critic(in_shape=in_critic).to(device)
        self.critic_optimizer = optim.Adam(self.local_critic.parameters(), lr=lr_critic)

        # Q: should local/target start with same weights ? synchronized after first copy after all
        # A: better hard copy at the beginning
        hard_update_A_from_B(self.target_actor, self.local_actor)
        hard_update_A_from_B(self.target_critic, self.local_critic)

        # Noise process
        self.noise = OUNoise(out_actor, scale=1.0)

Beispiel #6

0

Datei anzeigen

    def __init__(self, state_dim, action_dim, num_shared, device):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.device = device

        self.actor = Actor(state_dim, action_dim, num_shared).to(device)
        self.critic = Critic(state_dim, num_shared).to(device)

Beispiel #7

0

Datei anzeigen

    def __init__(self, in_actor, in_critic, action_size, num_agents,
                 random_seed):
        super(DDPG_agent, self).__init__()
        """init the agent"""

        self.action_size = action_size
        self.seed = random_seed

        # Fully connected actor network
        self.actor_local = Actor(in_actor, self.action_size,
                                 self.seed).to(device)
        self.actor_target = Actor(in_actor, self.action_size,
                                  self.seed).to(device)
        self.actor_optimizer = Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        # Fully connected critic network
        self.critic_local = Critic(in_critic, num_agents * self.action_size,
                                   self.seed).to(device)
        self.critic_target = Critic(in_critic, num_agents * self.action_size,
                                    self.seed).to(device)
        self.critic_optimizer = Adam(self.critic_local.parameters(),
                                     lr=LR_CRITIC,
                                     weight_decay=WEIGHT_DECAY)

        # Ornstein-Uhlenbeck noise process for exploration
        self.noise = OUNoise((action_size), random_seed)

Beispiel #8

0

Datei anzeigen

    def __init__(self, actor_size, action_size, critic_size):
        super().__init__()
        gpu = torch.cuda.is_available()
        if (gpu):
            print('GPU/CUDA works! Happy fast training :)')
            torch.cuda.current_device()
            torch.cuda.empty_cache()
            self.device = torch.device("cuda")
        else:
            print('training on cpu...')
        self.device = torch.device("cpu")

        self.actor = Actor(actor_size, action_size).to(self.device)
        self.actor_target = Actor(actor_size, action_size).to(self.device)
        self.actor_optim = optim.Adam(self.actor.parameters(), lr=0.0001)
        self.critic = Critic(critic_size).to(self.device)
        self.critic_target = Critic(critic_size).to(self.device)
        self.critic_optim = optim.Adam(self.critic.parameters(),
                                       lr=0.001,
                                       weight_decay=0)
        self.gamma = 0.95  #0.99
        self.tau = 0.001
        self.noise = OUNoise((action_size), 2)
        self.target_network_update(self.actor_target, self.actor, 1.0)
        self.target_network_update(self.critic_target, self.critic, 1.0)

Beispiel #9

0

Datei anzeigen

Datei: runner.py Projekt: drah/deep-reinforcement-learning

def run_agent(env: Environment, actor: Actor, render: bool,
              n_eval_episode: int, **kwargs):
    actor.eval()

    scores = []

    for _ in range(n_eval_episode):
        score = 0.

        states = env.reset(render=render)
        for step in count():
            actions = actor.act(states)
            actions = actions.detach().numpy()

            __log.debug("Actions: %s." % str(actions))
            states, rewards, dones, _ = env.step(actions)

            score += np.mean(rewards)

            if any(dones):
                __log.info("Done.")
                break

            if 'max_step' in kwargs and step >= kwargs['max_step']:
                __log.info("Break due to hit max_step.")
                break

        __log.info("Score: {}".format(score))
        scores.append(score)

    __log.info("Average: {}".format(np.mean(scores)))

Beispiel #10

0

Datei anzeigen

    def __init__(self, num_agents, state_size, action_size, random_seed=2018):
        self.num_agents = num_agents
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.device = torch.device('cuda' if cuda else 'cpu')

        self.update = UPDATE_EVERY
        self.updates = NUMBER_OF_UPDATES

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed, device)

Beispiel #11

0

Datei anzeigen

 def __init__(self, continuous=True, seed=0):
     super(LunarLander, self).__init__()
     if continuous:
         self.env = gym.make('LunarLanderContinuous-v2')
     else:
         self.env = gym.make('LunarLander-v2')
     self.env.seed(seed)
     self.mu = Actor(s_dim=8, a_dim=4, h_dim=20)

Beispiel #12

0

Datei anzeigen

    def __init__(self, state_size, action_size, n_agents, random_seed=1):
        self.actor_local = Actor(state_size, action_size, random_seed)
        self.actor_target = Actor(state_size, action_size, random_seed)

        self.ddpg_agents = [
            DDPGAgent(state_size, action_size, self.actor_local,
                      self.actor_target, random_seed) for _ in range(n_agents)
        ]

Beispiel #13

0

Datei anzeigen

    def __init__(self, sess, dimo, dimu, u_bound, critic_lr, actor_lr,
                 critic_l2, clip_norm, tau, layer_norm, noisy_layer, gamma,
                 memory_size, exploration, batch_size, env_dt):
        self._sess = sess

        self._dimo = dimo
        self._dimu = dimu
        self._critic_l2 = critic_l2
        self._actor_lr = actor_lr
        self._critic_lr = critic_lr
        self._clip_norm = clip_norm

        self._noisy = noisy_layer
        self._gamma = gamma
        self._tau = tau
        self._batch_size = batch_size
        self._u_bound = u_bound

        self._global_step = tf.train.get_or_create_global_step()

        self.ou_noise = OUNoise(dim=dimu,
                                n_step_annealing=exploration,
                                dt=env_dt)
        self._memory = ReplayMemory(memory_size)

        with tf.variable_scope('inputs'):
            self._obs = tf.placeholder(tf.float32, [None, self._dimo],
                                       name='state')
            self._u = tf.placeholder(tf.float32, [None, self._dimu],
                                     name='action')
            self._t_obs = tf.placeholder(tf.float32, [None, self._dimo],
                                         name='target_state')

        with tf.variable_scope('actor'):
            self._actor = Actor('main', self._obs, dimu, layer_norm,
                                noisy_layer)
            self._target_actor = Actor('target', self._t_obs, dimu, layer_norm,
                                       noisy_layer)

        with tf.variable_scope('critic'):
            self._critic = Critic('main', self._obs, self._u, layer_norm,
                                  noisy_layer)
            self._critic_pi = Critic('main',
                                     self._obs,
                                     U.scaling(self._actor.pi, -1.0, 1.0,
                                               self._u_bound['low'],
                                               self._u_bound['high']),
                                     layer_norm,
                                     noisy_layer,
                                     reuse=True)
            self._target_critic = Critic(
                'target', self._t_obs,
                U.scaling(self._target_actor.pi, -1.0, 1.0,
                          self._u_bound['low'], self._u_bound['high']),
                layer_norm, noisy_layer)

        self._build_train_method()
        self._update_target_op = self._update_target_networks()

Beispiel #14

0

Datei anzeigen

Datei: agents.py Projekt: JSheldon3488/DeepRL_Continuous_Control

    def __init__(self, state_size: int, action_size: int, num_agents: int,
                 epsilon, random_seed: int):
        """ Initialize a DDPG Agent Object

        :param state_size: dimension of state (input)
        :param action_size: dimension of action (output)
        :param num_agents: number of concurrent agents in the environment
        :param epsilon: initial value of epsilon for exploration
        :param random_seed: random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = random.seed(random_seed)
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.t_step = 0

        # Hyperparameters
        self.buffer_size = 1000000
        self.batch_size = 128
        self.update_every = 10
        self.num_updates = 10
        self.gamma = 0.99
        self.tau = 0.001
        self.lr_actor = 0.0001
        self.lr_critic = 0.001
        self.weight_decay = 0
        self.epsilon = epsilon
        self.epsilon_decay = 0.97
        self.epsilon_min = 0.005

        # Networks (Actor: State -> Action, Critic: (State,Action) -> Value)
        self.actor_local = Actor(self.state_size, self.action_size,
                                 random_seed).to(self.device)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  random_seed).to(self.device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=self.lr_actor)
        self.critic_local = Critic(self.state_size, self.action_size,
                                   random_seed).to(self.device)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    random_seed).to(self.device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=self.lr_critic,
                                           weight_decay=self.weight_decay)
        # Initialize actor and critic networks to start with same parameters
        self.soft_update(self.actor_local, self.actor_target, tau=1)
        self.soft_update(self.critic_local, self.critic_target, tau=1)

        # Noise Setup
        self.noise = OUNoise(self.action_size, random_seed)

        # Replay Buffer Setup
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

Beispiel #15

0

Datei anzeigen

    def __init__(self, state_dim, action_dim, action_lim, update_type='soft',
                lr_actor=1e-4, lr_critic=1e-3, tau=1e-3,
                mem_size=1e6, batch_size=256, gamma=0.99,
                other_cars=False, ego_dim=None):
        self.device = torch.device("cuda:0" if torch.cuda.is_available()
                                        else "cpu")

        self.joint_model = False
        if len(state_dim) == 3:
            self.model = ActorCriticCNN(state_dim, action_dim, action_lim)
            self.model_optim = optim.Adam(self.model.parameters(), lr=lr_actor)

            self.target_model = ActorCriticCNN(state_dim, action_dim, action_lim)
            self.target_model.load_state_dict(self.model.state_dict())

            self.model.to(self.device)
            self.target_model.to(self.device)

            self.joint_model = True
        else:
            self.actor = Actor(state_dim, action_dim, action_lim, other_cars=other_cars, ego_dim=ego_dim)
            self.actor_optim = optim.Adam(self.actor.parameters(), lr=lr_actor)
            self.target_actor = Actor(state_dim, action_dim, action_lim, other_cars=other_cars, ego_dim=ego_dim)
            self.target_actor.load_state_dict(self.actor.state_dict())
            self.target_actor.eval()

            self.critic = Critic(state_dim, action_dim, other_cars=other_cars, ego_dim=ego_dim)
            self.critic_optim = optim.Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1e-2)
            self.target_critic = Critic(state_dim, action_dim, other_cars=other_cars, ego_dim=ego_dim)
            self.target_critic.load_state_dict(self.critic.state_dict())
            self.target_critic.eval()

            self.actor.to(self.device)
            self.target_actor.to(self.device)
            self.critic.to(self.device)
            self.target_critic.to(self.device)

        self.action_lim = action_lim
        self.tau = tau # hard update if tau is None
        self.update_type = update_type
        self.batch_size = batch_size
        self.gamma = gamma

        if self.joint_model:
            mem_size = mem_size//100
        self.memory = Memory(int(mem_size), action_dim, state_dim)

        mu = np.zeros(action_dim)
        sigma = np.array([0.5, 0.05])
        self.noise = OrnsteinUhlenbeckActionNoise(mu, sigma)
        self.target_noise = OrnsteinUhlenbeckActionNoise(mu, sigma)

        self.initialised = True
        self.training = False

Beispiel #16

0

Datei anzeigen

Datei: model.py Projekt: paulcarfantan/Pytorch-RL-Agents

    def __init__(self, device, memory, state_size, action_size, low_bound, high_bound, folder, config):

        self.folder = folder
        self.config = config
        self.device = device
        self.memory = memory

        self.state_size = state_size
        self.action_size = action_size
        self.low_bound = low_bound
        self.high_bound = high_bound

        self.critic = Critic(state_size, action_size, device, self.config)
        self.actor = Actor(state_size, action_size, low_bound, high_bound, device, self.config)

Beispiel #17

0

Datei anzeigen

    def __init__(
            self,
            state_size,
            action_size,
            sample_batch_size,
            memory_size=int(1e5),  # replay buffer size
            batch_size=128,  # minibatch size
            gamma=0.99,  # discount factor
            tau=1e-3,  # for soft update of target parameters
            update_every=10,
            lr_actor=1e-4,
            lr_critic=1e-3,
            random_seed=2):
        self.sample_batch_size = sample_batch_size
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        self.params = {
            "lr_actor": lr_actor,
            "lr_critic": lr_critic,
            "gamma": gamma,
            "tau": tau,
            "memory_size": memory_size,
            "batch_size": batch_size,
            "optimizer": "adam"
        }

        self.actor_local = Actor(state_size, action_size,
                                 seed=random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  seed=random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=lr_actor)

        self.critic_local = Critic(state_size, action_size,
                                   seed=random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    seed=random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=lr_critic)

        self.memory = ReplayBuffer(action_size, memory_size, batch_size,
                                   random_seed)

        # Noise process
        self.noise = OUNoise([sample_batch_size, action_size], random_seed)

        self.learn_steps = 0
        self.update_every = update_every

Beispiel #18

0

Datei anzeigen

Datei: model.py Projekt: SuReLI/drl2fly_totuto

    def __init__(self, device, state_size, action_size, folder, config):

        self.folder = folder
        self.config = config
        self.device = device
        self.memory = ReplayMemory(self.config["MEMORY_CAPACITY"])

        self.state_size = state_size
        self.action_size = action_size

        self.critic = Critic(self.state_size, self.action_size, self.device,
                             self.config)
        self.actor = Actor(self.state_size, self.action_size, self.device,
                           self.config)

Beispiel #19

0

Datei anzeigen

Datei: ddpg.py Projekt: tskTNK/LunarLandingSimulation

    def __init__(self,
                 env,
                 log_dir,
                 gamma=0.99,
                 batch_size=64,
                 sigma=0.2,
                 batch_norm=True,
                 merge_layer=2,
                 buffer_size=int(1e6),
                 buffer_min=int(1e4),
                 tau=1e-3,
                 Q_wd=1e-2,
                 num_episodes=1000):

        self.s_dim = env.reset().shape[0]
        # self.a_dim = env.action_space.shape[0]
        self.a_dim = env.action_space2.shape[0]
        # self.a_dim = 1

        self.env = env
        # self.mu = Actor(self.s_dim, self.a_dim, env.action_space, batch_norm=batch_norm)
        self.mu = Actor(self.s_dim,
                        self.a_dim,
                        env.action_space2,
                        batch_norm=batch_norm)
        self.Q = Critic(self.s_dim,
                        self.a_dim,
                        batch_norm=batch_norm,
                        merge_layer=merge_layer)
        self.targ_mu = copy.deepcopy(self.mu).eval()
        self.targ_Q = copy.deepcopy(self.Q).eval()
        self.noise = OrnsteinUhlenbeck(mu=torch.zeros(self.a_dim),
                                       sigma=sigma * torch.ones(self.a_dim))
        self.buffer = Buffer(buffer_size, self.s_dim, self.a_dim)
        self.buffer_min = buffer_min
        self.mse_fn = torch.nn.MSELoss()
        self.mu_optimizer = torch.optim.Adam(self.mu.parameters(), lr=1e-4)
        self.Q_optimizer = torch.optim.Adam(self.Q.parameters(),
                                            lr=1e-3,
                                            weight_decay=Q_wd)

        self.gamma = gamma
        self.batch_size = batch_size
        self.num_episodes = num_episodes
        self.tau = tau
        self.log_dir = log_dir

        self.fill_buffer()

Beispiel #20

0

Datei anzeigen

class DDPG_agent(nn.Module):
    def __init__(self, in_actor, in_critic, action_size, num_agents,
                 random_seed):
        super(DDPG_agent, self).__init__()
        """init the agent"""

        self.action_size = action_size
        self.seed = random_seed

        # Fully connected actor network
        self.actor_local = Actor(in_actor, self.action_size,
                                 self.seed).to(device)
        self.actor_target = Actor(in_actor, self.action_size,
                                  self.seed).to(device)
        self.actor_optimizer = Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        # Fully connected critic network
        self.critic_local = Critic(in_critic, num_agents * self.action_size,
                                   self.seed).to(device)
        self.critic_target = Critic(in_critic, num_agents * self.action_size,
                                    self.seed).to(device)
        self.critic_optimizer = Adam(self.critic_local.parameters(),
                                     lr=LR_CRITIC,
                                     weight_decay=WEIGHT_DECAY)

        # Ornstein-Uhlenbeck noise process for exploration
        self.noise = OUNoise((action_size), random_seed)

    def act(self, state, add_noise=True):
        """Returns actions for given state as per current policy."""
        state = torch.from_numpy(state).float().to(device)
        self.actor_local.eval()
        with torch.no_grad():
            action = self.actor_local(state).cpu().data.numpy()
        self.actor_local.train()
        if add_noise:
            action += self.noise.sample()

        return np.clip(action, -1, 1)

    def target_act(self, state, add_noise=True):
        """Returns actions for given state as per current policy."""
        action = self.actor_target(state)
        return action

    def reset(self):
        """ Resets noise """
        self.noise.reset()

Beispiel #21

0

Datei anzeigen

    def __init__(self, policy: str, action_dim: int, max_action: float,
                 lr: float, discount: float, noise_clip: float,
                 policy_noise: float, policy_freq: int, actor_rng: jnp.ndarray,
                 critic_rng: jnp.ndarray, sample_state: np.ndarray):
        self.discount = discount
        self.noise_clip = noise_clip
        self.policy_noise = policy_noise
        self.policy_freq = policy_freq
        self.max_action = max_action
        self.td3_update = policy == 'TD3'

        self.actor = hk.transform(lambda x: Actor(action_dim, max_action)(x))
        actor_opt_init, self.actor_opt_update = optix.adam(lr)

        self.critic = hk.transform(lambda x: Critic()(x))
        critic_opt_init, self.critic_opt_update = optix.adam(lr)

        self.actor_params = self.target_actor_params = self.actor.init(
            actor_rng, sample_state)
        self.actor_opt_state = actor_opt_init(self.actor_params)

        action = self.actor.apply(self.actor_params, sample_state)

        self.critic_params = self.target_critic_params = self.critic.init(
            critic_rng, jnp.concatenate((sample_state, action), 0))
        self.critic_opt_state = critic_opt_init(self.critic_params)

        self.updates = 0

Beispiel #22

0

Datei anzeigen

Datei: agents.py Projekt: zshchou/keras-a3c

    def __init__(self, env_name: str, threads: int, episodes: int,
                 entropy_weight: float, learning_rate: Union[
                     float,
                     tf.keras.optimizers.schedules.LearningRateSchedule],
                 discount_factor: float):

        self.env_name = env_name
        env = gym.make(env_name)

        self.save_dir = os.path.expanduser('~/keras-a3c/models/')

        self.threads = threads
        self.EPISODES = episodes
        self.entropy_weight = entropy_weight
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor

        actor = Actor(action_space_size=env.action_space.n)
        critic = Critic()
        self.global_model = ActorCriticModel(actor, critic)

        self.actor_loss = ActorLoss(entropy_weight)

        self.optimizer = tf.keras.optimizers.RMSprop(lr=learning_rate)

        self.global_model(
            tf.convert_to_tensor(
                np.random.random((1, env.observation_space.shape[0]))))

Beispiel #23

0

Datei anzeigen

Datei: ddpg_agent.py Projekt: samindaa/DRLND_p3_collab_compet

    def __init__(self, state_size, action_size, num_agents, random_seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            num_agents (int): number of agents
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = [
            OUNoise(action_size, random_seed, sigma=0.1)
            for i in range(self.num_agents)
        ]

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed)

        # Make sure target is with the same weight as the source
        self.hard_update(self.actor_target, self.actor_local)
        self.hard_update(self.critic_target, self.critic_local)

        self.t_step = 0

Beispiel #24

0

Datei anzeigen

class Agent():
    def __init__(self, actor_size, action_size, critic_size):
        super().__init__()
        gpu = torch.cuda.is_available()
        if (gpu):
            print('GPU/CUDA works! Happy fast training :)')
            torch.cuda.current_device()
            torch.cuda.empty_cache()
            self.device = torch.device("cuda")
        else:
            print('training on cpu...')
        self.device = torch.device("cpu")

        self.actor = Actor(actor_size, action_size).to(self.device)
        self.actor_target = Actor(actor_size, action_size).to(self.device)
        self.actor_optim = optim.Adam(self.actor.parameters(), lr=0.0001)
        self.critic = Critic(critic_size).to(self.device)
        self.critic_target = Critic(critic_size).to(self.device)
        self.critic_optim = optim.Adam(self.critic.parameters(),
                                       lr=0.001,
                                       weight_decay=0)
        self.gamma = 0.95  #0.99
        self.tau = 0.001
        self.noise = OUNoise((action_size), 2)
        self.target_network_update(self.actor_target, self.actor, 1.0)
        self.target_network_update(self.critic_target, self.critic, 1.0)

    def select_actions(self, state):
        state = torch.from_numpy(state).float().to(self.device).view(1, -1)
        #print(state.shape)
        self.actor.eval()
        with torch.no_grad():
            actions = self.actor(state).cpu().data.squeeze(0)
        self.actor.train()
        actions += self.noise.sample()
        return np.clip(actions, -1, 1)

    def reset(self):
        self.noise.reset()

    def target_network_update(self, target_network, network, tau):
        for network_param, target_param in zip(network.parameters(),
                                               target_network.parameters()):
            target_param.data.copy_(tau * network_param.data +
                                    (1.0 - tau) * target_param.data)

Beispiel #25

0

Datei anzeigen

Datei: ACagent.py Projekt: xuezzee/symbolic-RL

class Agent():
    def __init__(self, args):
        self.args = args
        self.critic = Critic(args.dim_s, args.dim_a, args.dim_h, args.device)
        self.actor = Actor(args.dim_s, args.dim_a, args.dim_h, args.device)


    def choose_action(self, s):
        # print("agent state:", s)
        return self.actor.choose_action(s)

    def learn(self, trans):
        td = self.critic.cal_td_loss(trans['s'],
                                     trans['r'],
                                     trans['s_'])
        self.critic.learn(trans['s'],
                          trans['r'],
                          trans['s_'])
        self.actor.learn(td, trans['a'])

    def save(self, path):
        self.critic.save(path)
        self.actor.save(path)

    def load(self, path):
        self.critic.load(path)
        self.actor.load(path)

Beispiel #26

0

Datei anzeigen

    def __init__(self, state_dim, action_dim, device):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.device = device

        self.actor = Actor(state_dim, action_dim).to(device)
        self.critic = Critic(state_dim).to(device)
        self.optimizer = torch.optim.Adam(
            itertools.chain(self.actor.parameters(), self.critic.parameters()),
            LR)

        self.philosophers = list()
        for i in range(P_COUNT):
            self.philosophers.append(Critic(state_dim).to(device))

        self.p_optimizers = [
            torch.optim.Adam(p.parameters(), lr=P_LR)
            for p in self.philosophers
        ]
        self.update_cnt = 0

Beispiel #27

0

Datei anzeigen

Datei: PPO.py Projekt: walkerdimon/RL-Projects

    def __init__(self, args, env):

        self.learning_rate = args.learning_rate
        self.gamma = args.gamma
        self.lamb = args.lamb
        self.batch_size = args.batch_size
        self.step = 0
        self.epochs = args.epochs
 
        self.actor = Actor()
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=self.learning_rate)

        self.critic = Critic()
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=self.learning_rate)

        self.env = env
        self.num_actions = env.num_actions
        self.num_states = env.num_states

        self.data = {'step' : [], 'reward' : [], 'losses' : []}

Beispiel #28

0

Datei anzeigen

class DDPG:
    def __init__(self,
                 in_actor,
                 out_actor,
                 in_critic,  # e.g. = n_agent * (state_size + action_size)
                 lr_actor=1e-4,
                 lr_critic=1e-3,  # better learn faster than actor
                 random_seed=2):
        self.state_size = in_actor
        self.action_size = out_actor
        self.seed = random.seed(random_seed)

        self.params = {"lr_actor": lr_actor,
                       "lr_critic": lr_critic,
                       "optimizer": "adam"}

        self.local_actor = Actor(in_shape=in_actor, out_shape=out_actor).to(device)
        self.target_actor = Actor(in_shape=in_actor, out_shape=out_actor).to(device)
        self.actor_optimizer = optim.Adam(self.local_actor.parameters(), lr=lr_actor)

        # for a single agent, critic takes global observations as input, and output action-value Q
        # e.g. global_states = all_states + all_actions
        self.local_critic = Critic(in_shape=in_critic).to(device)
        self.target_critic = Critic(in_shape=in_critic).to(device)
        self.critic_optimizer = optim.Adam(self.local_critic.parameters(), lr=lr_critic)

        # Q: should local/target start with same weights ? synchronized after first copy after all
        # A: better hard copy at the beginning
        hard_update_A_from_B(self.target_actor, self.local_actor)
        hard_update_A_from_B(self.target_critic, self.local_critic)

        # Noise process
        self.noise = OUNoise(out_actor, scale=1.0)

    def act(self, obs, noise_scale=0.0):
        obs = obs.to(device)
        # debug noise
        # noise = torch.from_numpy(noise_scale*0.5*np.random.randn(1, self.action_size)).float().to(device)
        # action = self.local_actor(obs) + noise
        action = self.local_actor(obs) + noise_scale * self.noise.noise().to(device)
        return action

    def target_act(self, obs, noise_scale=0.0):
        obs = obs.to(device)
        # noise = torch.from_numpy(noise_scale*0.5 * np.random.randn(1, self.action_size)).float().to(device)
        # action = self.target_actor(obs) + noise_scale * noise
        action = self.target_actor(obs) + noise_scale * self.noise.noise().to(device)
        return action

    def reset(self):
        self.noise.reset()

Beispiel #29

0

Datei anzeigen

Datei: agent.py Projekt: crazyleg/MATD3--reinforcement-learning

    def __init__(self):
      
        self.max_action = 1
        self.policy_freq = 2
        self.policy_freq_it = 0
        self.batch_size = 512
        self.discount = 0.99
        self.replay_buffer = int(1e5)
        
        
        self.device = 'cuda'
        
        self.state_dim = 24
        self.action_dim = 2
        self.max_action = 1
        self.policy_noise = 0.1
        self.agents = 1
        
        self.random_period = 1e4
        
        self.tau = 5e-3
        
        self.replay_buffer = ReplayBuffer(self.replay_buffer)
        
        self.actor = Actor(self.state_dim, self.action_dim, self.max_action).to(self.device)
        self.actor_target = Actor(self.state_dim, self.action_dim, self.max_action).to(self.device)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=1e-4)
#         self.actor.load_state_dict(torch.load('actor2.pth'))
#         self.actor_target.load_state_dict(torch.load('actor2.pth'))

        self.noise = OUNoise(2, 32)
        
        
        self.critic = Critic(48, self.action_dim).to(self.device)
        self.critic_target = Critic(48, self.action_dim).to(self.device)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=3e-4)

Beispiel #30

0

Datei anzeigen

Datei: agent.py Projekt: yeknafar/Reinforcement-Learning

    def __init__(self, num_agents, state_size, action_size, random_seed=2018):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.num_agents = num_agents
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.device = torch.device('cuda' if cuda else 'cpu')

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size,
                                 random_seed).to(device)
        self.actor_target = Actor(state_size, action_size,
                                  random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   random_seed, device)