コード例 #1
0
ファイル: trainer.py プロジェクト: onewarmheart/HER
    def __init__(self, args, state_dim, action_dim, action_lim, ram):
        """
		:param state_dim: Dimensions of state (int)
		:param action_dim: Dimension of action (int)
		:param action_lim: Used to limit action in [-action_lim,action_lim]
		:param ram: replay memory buffer object
		:return:
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)
        self.args = args

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.args.learning_rate)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 self.args.learning_rate)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
コード例 #2
0
    def __init__(self, state_num, action_num, buffer_size=2000, batch_size=64, gamma=0.99, \
        device=torch.device('cpu'), hidden_unit=16, lr=1e-3):

        self.device = device

        # create A-C Networks
        self.actor = Actor(state_num, action_num, hidden_unit=hidden_unit).to(device)
        self.target_actor = Actor(state_num, action_num, hidden_unit=hidden_unit).to(device)
        self.critic = Critic(state_num, action_num, hidden_unit=hidden_unit).to(device)
        self.target_critic = Critic(state_num, action_num, hidden_unit=hidden_unit).to(device)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)

        # add optimize
        self.actor_optimizer = torch.optim.Adam(
            self.actor.parameters(), lr=lr)
        self.critic_optimizer = torch.optim.Adam(
            self.critic.parameters(), lr=lr)

        # add other params
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.gamma = gamma
        self.memory_buffer = deque(maxlen=buffer_size)
        self.noise = utils.OrnsteinUhlenbeckActionNoise(action_num)
#         self.action_range = action_range
        self.critic_loss_F = nn.MSELoss()
コード例 #3
0
ファイル: DDPG.py プロジェクト: pamzerbhu/AirSim
    def __init__(self, settings):

        self.settings = init_settings.copy()
        self.settings.update(settings)

        self.state_dim = self.settings["state_dim"]
        self.action_dim = self.settings["action_dim"]
        self.action_lim = self.settings["action_lim"]

        self.noise = utils.OrnsteinUhlenbeckActionNoise(
            self.settings["action_dim"])
        self.buffer = MemoryBuffer(self.settings["buffer_length"])
        self.batch_size = self.settings["batch_size"]
        self.gamma = self.settings["reward_decay"]
        self.tau = self.settings["TAU"]
        self.lr = self.settings["learning_rate"]

        self.actor = self.settings["Actor"]
        self.critic = self.settings["Critic"]
        self.target_actor = copy.deepcopy(self.actor)
        self.target_critic = copy.deepcopy(self.critic)

        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.lr)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 self.lr)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
コード例 #4
0
ファイル: sgn_rl.py プロジェクト: MultiPath/SGN-RL
    def __init__(self, game):
        super(Agent, self).__init__()
        self.env_name = game.env_name
        self.id = -1

        # main network
        self.net = nn.ModuleList()
        input_size = game.input_size
        for size in game.layers:
            self.net.append(nn.Linear(input_size, size))
            input_size = size
        self.output = nn.Linear(input_size, game.output_size)

        # action noise
        self.noise = utils.OrnsteinUhlenbeckActionNoise(game.output_size)
        self.env = None
コード例 #5
0
ファイル: train.py プロジェクト: steph1793/Leave-No-Trace
    def __init__(self, agent, ram):

        self.ram = ram
        self.iter = 0
        self.agent = agent
        self.noise = utils.OrnsteinUhlenbeckActionNoise(
            self.agent["action_dim"])

        self.actor = self.agent["actor"]
        self.target_actor = self.agent["target_actor"]
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        self.critic = self.agent['critic']
        self.target_critic = self.agent['target_critic']
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)
コード例 #6
0
    def __init__(self, state_dim, action_dim, action_lim, ram):
        """Special method for object initialisation.

		:param state_dim: Dimensions of state.
		:type state_dim: int.
		:param action_dim: Dimension of action.
		:type action_dim: int.
		:param action_lim: Used to limit action in [-action_lim, action_lim].
		:type action_lim: float.
		:param ram: replay memory buffer object.
		:type ram: buffer.
		"""

        # Set the parameters.
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0

        # Set the noise function.
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        # Set the actor.
        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        # Set the critic.
        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        # Update the actor and critic networks
        self.hard_update(self.target_actor, self.actor)
        self.hard_update(self.target_critic, self.critic)

        return
コード例 #7
0
ファイル: ddpg.py プロジェクト: zfw1226/craves_control
    def __init__(self, obs_space, action_space, ram, writer, device, args):
        """
        :param obs_space: Dimensions of state (int)
        :param action_space: Dimension of action (int)
        :param ram: replay memory buffer object
        :return:
        """
        self.state_dim = obs_space.shape[0]
        self.action_dim = action_space.shape[0]
        self.action_high = action_space.high
        self.action_low = action_space.low
        self.ram = ram
        self.iter = 1
        self.steps = 0
        self.gamma = args.gamma
        self.batch_size = args.batch_size
        self.tau = args.tau
        self.decay_rate = args.decay_rate
        self.eps_start = args.eps_start
        self.eps_end = args.eps_end
        self.eps_decay = args.eps_decay
        self.start_step = args.start_learning
        self.device = device
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)
        self.writer = writer
        self.args = args

        # init network
        target_net = DDPG(obs_space.shape, self.action_dim, args).to(device)
        learn_net = DDPG(obs_space.shape, self.action_dim, args).to(device)
        utils.hard_update(target_net, learn_net)
        self.AC = learn_net
        self.AC_T = target_net
        self.actor_optimizer = torch.optim.Adam(
            self.AC.actor.policyNet.parameters(), args.lr_a)
        self.critic_optimizer = torch.optim.Adam(self.AC.critic.parameters(),
                                                 args.lr_c)
        self.actor = self.AC.actor
        self.target_actor = self.AC_T.actor
        self.critic = self.AC.critic
        self.target_critic = self.AC_T.critic
コード例 #8
0
    def __init__(self, state_dim, action_dim, ram):
        """
		Initialize actor and critic networks
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.ram = ram
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        self.actor = model.Actor(self.state_dim, self.action_dim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        # copy parameters to target networks
        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
コード例 #9
0
ファイル: train.py プロジェクト: yangcyself/Hexpod_locomotion
    def __init__(self, state_dim, action_dim, action_lim, ram):
        """
		:param state_dim: Dimensions of state (int)
		:param action_dim: Dimension of action (int)
		:param action_lim: Used to limit action in [-action_lim,action_lim]
		:param ram: replay memory buffer object
		:return:
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE,
                                                weight_decay=1e-5)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE * 10,
                                                 weight_decay=1e-5)

        if (USEGPU):
            self.target_actor = self.target_actor.cuda()
            self.actor = self.actor.cuda()
            self.target_critic = self.target_critic.cuda()
            self.critic = self.critic.cuda()

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
コード例 #10
0
    def __init__(self, state_dim, action_dim, ram, LR_actor, LR_critic, gamma,
                 tau, batchsize, expl_rate, version):
        """
		:param state_dim: Dimensions of state (int)
		:param action_dim: Dimension of action (int)
		:param action_lim: Used to limit action in [-action_lim,action_lim]
		:param ram: replay memory buffer object
		:return:
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.LR_actor = LR_actor
        self.LR_critic = LR_critic
        self.gamma = gamma
        self.tau = tau
        self.ram = ram
        self.batchsize = batchsize
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(
            self.action_dim, 0, 0.15, expl_rate)
        self.action_lim = 1.0

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.LR_actor)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 self.LR_critic)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)