Exemple #1
0
    def __init__(self, args, state_dim, action_dim, action_lim, ram):
        """
		:param state_dim: Dimensions of state (int)
		:param action_dim: Dimension of action (int)
		:param action_lim: Used to limit action in [-action_lim,action_lim]
		:param ram: replay memory buffer object
		:return:
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)
        self.args = args

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.args.learning_rate)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 self.args.learning_rate)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
    def __init__(self, hp):
        """Initialize an Agent object.
        
        Params
        ======
            hp: hyper parameters
        """
        self.hp = hp

        # Actor Network (w/ Target Network)
        self.actor_local = model.Actor(self.hp.state_size, self.hp.action_size,
                                       self.hp.random_seed).to(device)
        self.actor_target = model.Actor(self.hp.state_size,
                                        self.hp.action_size,
                                        self.hp.random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=self.hp.lr_actor)

        # Critic Network (w/ Target Network)
        self.critic_local = model.Critic(self.hp.state_size,
                                         self.hp.action_size,
                                         self.hp.random_seed).to(device)
        self.critic_target = model.Critic(self.hp.state_size,
                                          self.hp.action_size,
                                          self.hp.random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=self.hp.lr_critic,
                                           weight_decay=self.hp.weight_decay)
        self.soft_update(self.critic_local, self.critic_target, 1)
        self.soft_update(self.actor_local, self.actor_target, 1)

        # Noise process
        self.noise = ounoise.OUNoise(self.hp.action_size, self.hp.random_seed)
    def __init__(self, state_dim, action_dim, action_lim, ram, device='cpu'):
        """
        :param state_dim: Dimensions of state (int)
        :param action_dim: Dimension of action (int)
        :param action_lim: Used to limit action in [-action_lim,action_lim]
        :param ram: replay memory buffer object
        :return:
        """
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0
        self.device = device
        # self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim).to(device)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim).to(device)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        self.critic = model.Critic(self.state_dim, self.action_dim).to(device)
        self.target_critic = model.Critic(self.state_dim,
                                          self.action_dim).to(device)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
Exemple #4
0
    def __init__(self, config, state_size, action_size, num_agents, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """

        self.config = config

        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = random.seed(seed)

        # Initialize the Actor and Critic Networks
        self.actor = model.Actor(state_size, action_size,
                                 seed).to(self.config.device)
        self.actor_target = model.Actor(state_size, action_size,
                                        seed).to(self.config.device)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.config.LR_actor)

        self.critic = model.Critic(state_size, action_size,
                                   seed).to(self.config.device)
        self.critic_target = model.Critic(state_size, action_size,
                                          seed).to(self.config.device)
        self.critic_optimizer = torch.optim.Adam(
            self.critic.parameters(),
            self.config.LR_critic,
            weight_decay=self.config.weight_decay)

        # Initialize the random-noise-process for action-noise
        self.is_training = True
        self.randomer = OUNoise((self.num_agents, self.action_size), seed)

        # Hard update the target networks to have the same parameters as the local networks
        for target_param, param in zip(self.actor_target.parameters(),
                                       self.actor.parameters()):
            target_param.data.copy_(param.data)
        for target_param, param in zip(self.critic_target.parameters(),
                                       self.critic.parameters()):
            target_param.data.copy_(param.data)

        # Initialize replay-buffer
        self.memory = ReplayBuffer(self.config.BUFFER_SIZE,
                                   self.config.BATCH_SIZE, seed,
                                   self.config.device)
    def __init__(self,
                 state_size,
                 action_size,
                 random_seed,
                 num_envs=1,
                 checkpt_folder="checkpt"):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.num_envs = num_envs
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.CHECKPOINT_FOLDER = checkpt_folder

        # Actor Network (w/ Target Network)
        self.actor_local = model.Actor(state_size, action_size,
                                       random_seed).to(DEVICE)
        self.actor_target = model.Actor(state_size, action_size,
                                        random_seed).to(DEVICE)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = model.Critic(state_size, action_size,
                                         random_seed).to(DEVICE)
        self.critic_target = model.Critic(state_size, action_size,
                                          random_seed).to(DEVICE)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)
        '''if os.path.isfile(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth') and os.path.isfile(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'):
            self.actor_local.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth'))
            self.actor_target.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth'))

            self.critic_local.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'))
            self.critic_target.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'))'''

        # Noise process
        self.noise = OUNoise((num_envs, action_size), random_seed)

        # Replay memory
        self.memory = ReplayBuffer(DEVICE, action_size, BUFFER_SIZE,
                                   BATCH_SIZE, random_seed)
Exemple #6
0
    def __init__(self, model_source, img_size=299):
        model_source = torch.load(model_source)

        self.word2idx = model_source["dict"]
        self.idx2word = {v: k for k, v in self.word2idx.items()}

        args = model_source["settings"]
        actor = model.Actor(args.vocab_size,
                    args.dec_hsz,
                    args.rnn_layers,
                    2,
                    args.max_len,
                    args.dropout,
                    True)

        actor.load_state_dict(model_source["model"])
        actor = actor.cuda()

        self.actor = actor.eval()

        self._encode = transforms.Compose([
                            transforms.Resize(img_size),
                            transforms.CenterCrop(img_size),
                            transforms.ToTensor()
                        ])

        self.max_len = args.max_len
Exemple #7
0
    def __init__(self, state_dim, action_dim, action_lim, ram):
        """Special method for object initialisation.

		:param state_dim: Dimensions of state.
		:type state_dim: int.
		:param action_dim: Dimension of action.
		:type action_dim: int.
		:param action_lim: Used to limit action in [-action_lim, action_lim].
		:type action_lim: float.
		:param ram: replay memory buffer object.
		:type ram: buffer.
		"""

        # Set the parameters.
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0

        # Set the noise function.
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        # Set the actor.
        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        # Set the critic.
        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        # Update the actor and critic networks
        self.hard_update(self.target_actor, self.actor)
        self.hard_update(self.target_critic, self.critic)

        return
Exemple #8
0
def rl_graph(sess, phrl):
  Actor = model.Actor()
  Y_score = Actor.build(phrl['states_rl'], N_ACTION, phrl['is_training_rl'])
  Y_prob =tf.nn.softmax(Y_score)


  neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = Y_score, labels = phrl['actions_rl'])
  loss_op = tf.reduce_mean(neg_log_prob*phrl['values_rl'])

  # update_op = tf.train.MomentumOptimizer(LR, MOMENTUM).minimize(loss_op, var_list=Actor.vars)
  update_op = tf.train.AdamOptimizer(1e-3).minimize(loss_op, var_list=Actor.vars)

  return loss_op, Y_prob, update_op, Actor.vars
    def __init__(self, state_dim, action_dim, ram):
        """
		Initialize actor and critic networks
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.ram = ram
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        self.actor = model.Actor(self.state_dim, self.action_dim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        # copy parameters to target networks
        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
Exemple #10
0
    def __init__(self, env, state_vector_size, action_num, action_limit, ram):
        """
        :param env: Gym environment
        :param state_dim: Dimensions of state (int)
        :param action_dim: Dimension of action (int)
        :param action_lim: Used to limit action in [-action_lim,action_lim]
        :param ram: replay memory buffer object
        :return:
        """
        self.env = env
        self.state_dim = state_vector_size
        self.action_dim = action_num
        self.action_lim = action_limit
        self.ram = ram
        self.iter = 0
        self.noise = OrnsteinUhlenbeckActionNoise(self.action_dim)

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)

        self.state_vector_size = state_vector_size
        self.action_num = action_num
        self.action_limit = action_limit
        self.controller = DQNAgent(env, state_vector_size, action_num,
                                   action_limit)
Exemple #11
0
    def __init__(self, state_dim, action_dim, ram, LR_actor, LR_critic, gamma,
                 tau, batchsize, expl_rate, version):
        """
		:param state_dim: Dimensions of state (int)
		:param action_dim: Dimension of action (int)
		:param action_lim: Used to limit action in [-action_lim,action_lim]
		:param ram: replay memory buffer object
		:return:
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.LR_actor = LR_actor
        self.LR_critic = LR_critic
        self.gamma = gamma
        self.tau = tau
        self.ram = ram
        self.batchsize = batchsize
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(
            self.action_dim, 0, 0.15, expl_rate)
        self.action_lim = 1.0

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.LR_actor)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 self.LR_critic)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
Exemple #12
0
    def build_net(self):
        '''build actor, critic, target_actor, target_critic network'''

        # actor
        self.actor = model.Actor(self.state_size, self.hidden_size, self.action_size, \
            self.n_layers, output_activation=self.output_activation)
        self.target_actor = deepcopy(self.actor)
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.lr)

        # critic
        self.critic = model.QNet(self.state_size, self.hidden_size, 1, \
            self.n_layers, self.action_size, output_activation=None)
        self.target_critic = deepcopy(self.critic)
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=self.lr)

        # replay memory
        self.replay_memory = util.ReplayMemory(1000000)
        self.transition = collections.namedtuple(
            "transition", ["state", "action", "next_state", "reward"])

        if self.ounoise:
            self.noise = OUNoise(mu=np.zeros(self.action_size), sigma=0.1)
        self.epsilon = 1.
Exemple #13
0
                .format(i_episode, np.mean(scores_deque)))
            agent.checkpoint(checkpt)
            break

    return scores, mean_scores_window


# In[19]:

config = Config(seed=6)

config.num_agents = len(env_info.agents)
config.state_size = state_size
config.action_size = action_size

config.actor_fn = lambda: model.Actor(config.state_size, config.action_size,
                                      128, 128)
config.actor_opt_fn = lambda params: optim.Adam(params, lr=1e-3)

config.critic_fn = lambda: model.Critic(config.state_size, config.action_size,
                                        1, 128, 128)
config.critic_opt_fn = lambda params: optim.Adam(params, lr=2e-3)

config.replay_fn = lambda: Replay(
    config.action_size, buffer_size=int(1e6), batch_size=128)
config.noise_fn = lambda: OUNoise(
    config.action_size, mu=0., theta=0.15, sigma=0.1, seed=config.seed)

config.discount = 0.99
config.target_mix = 3e-3

config.max_episodes = 3000
Exemple #14
0
db.create_all()

with open('data/movies.csv', 'r', encoding='utf-8-sig') as movies_file:
    reader = DictReader(movies_file)
    for row in reader:
        new_movie = model.Movie(name=row['name'], year=row['year'])

        actors = row['actors'].split(';')
        for actor in actors:
            print(actor)
            existing_actor = model.Actor.query.filter_by(name=actor).first()
            if (existing_actor):
                existing_actor.movies.append(new_movie)
                new_movie.actors.append(existing_actor)
            else:
                new_actor = model.Actor(name=actor)
                new_actor.movies.append(new_movie)
                new_movie.actors.append(new_actor)
                db.session.add(new_actor)

        db.session.add(new_movie)

with open('data/songs.csv', 'r', encoding='utf-8-sig') as songs_file:
    reader = DictReader(songs_file)
    for row in reader:
        new_song = model.Song(name=row['name'])

        # add artists
        artists = row['artists'].split(";")
        for artist_name in artists:
            print(artist_name)
Exemple #15
0
                              data['valid']['imgs'],
                              data['valid']['captions'],
                              args.max_len,
                              batch_size=args.batch_size,
                              is_cuda=use_cuda,
                              evaluation=True)

# ##############################################################################
# Build model
# ##############################################################################
import model
from const import PAD
from optim import Optim

encode = model.Encode(use_cuda)
actor = model.Actor(args.vocab_size, args.dec_hsz, args.rnn_layers,
                    args.batch_size, args.max_len, args.dropout, use_cuda)

critic = model.Critic(args.vocab_size, args.dec_hsz, args.rnn_layers,
                      args.batch_size, args.max_len, args.dropout, use_cuda)

optim_pre_A = Optim(actor.parameters(), args.pre_lr, True)
optim_pre_C = Optim(critic.parameters(), args.pre_lr, True)

optim_A = Optim(actor.parameters(), args.lr, False, args.new_lr)
optim_C = Optim(critic.parameters(), args.lr, False, args.new_lr)

criterion_A = torch.nn.CrossEntropyLoss(ignore_index=PAD)
criterion_C = torch.nn.MSELoss()

if use_cuda:
    actor = actor.cuda()
Exemple #16
0
    def update_target(self, source, target):
        new_target_param = parameters_to_vector(source.parameters()) * self.tau + \
          (1 - self.tau) * parameters_to_vector(target.parameters())
        vector_to_parameters(new_target_param, target.parameters())
        return target


if __name__ == '__main__':
    env = gym.make("CartPole-v0")

    global state_size, action_size
    state_size = int(np.product(env.observation_space.shape))
    action_size = int(env.action_space.n)
    num_episode = 800
    critic = model.Critic(state_size, action_size)
    actor = model.Actor(state_size, action_size)

    # actor.eval()
    # critic.eval()

    # target network
    target_critic = deepcopy(critic)
    target_actor = deepcopy(actor)

    ddpg = DDPG(env,
                actor,
                critic,
                target_actor,
                target_critic,
                num_episode,
                replay_memory,
Exemple #17
0
    def build_net(self):
        '''
        build network based on parameters input

        Actor Network:
         * Use gaussian:
            the network outputs mean and log standard deviation, 
            so the output size should be action size
         * Use determinstic:
            the network only outputs mean with action size, 
            so set the discrete as True that log standard deviation is None 
        
        Value Network:
         * output the estimated value based on replay buffer

        Q network:
         * output the Q value

        Target Network:
         * use to update Q network
        '''

        # build net
        if self.policy_type == "gaussian":
            self.actor = model.Net(self.state_size, self.hidden_size, self.action_size, \
                self.n_layers, output_activation=self.output_activation, discrete=self.discrete)
            # automated entropy adjustment for maximum entropy rl
            self.entropy_target = - self.action_size
            self.log_alpha = torch.zeros(1, requires_grad=True)
            self.alpha_optimizer = optim.Adam([self.log_alpha], lr=self.lr)
            # target q network
            self.target_q_net = model.QNet(self.state_size, self.hidden_size, 1, \
                self.n_layers, self.action_size, output_activation=None)

        elif self.policy_type == "deterministic":
            self.actor = model.Net(self.state_size, self.hidden_size, self.action_size, \
                self.n_layers, output_activation=self.output_activation, discrete=self.discrete)
            self.value_net = model.Actor(self.state_size, self.hidden_size, 1, \
                self.n_layers, output_activation=None)
            self.target_value_net = deepcopy(self.value_net)
            self.value_optimizer = optim.Adam(self.value_net.parameters(), lr=self.lr)
        
        if self.duel_q_net:
            # duel Qnet
            self.q1_net = model.QNet(self.state_size, self.hidden_size, 1, \
                self.n_layers, self.action_size, output_activation=None)
            self.q2_net = deepcopy(self.q1_net)
        else:
            self.q_net = model.QNet(self.state_size, self.hidden_size, 1, \
                self.n_layers, self.action_size, output_activation=None)

        # optimizer
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.lr)
        
        if self.duel_q_net:
            self.q1_optimizer = optim.Adam(self.q1_net.parameters(), lr=self.lr)
            self.q2_optimizer = optim.Adam(self.q2_net.parameters(), lr=self.lr)
        else:
            self.q_optimizer = optim.Adam(self.q_net.parameters(), lr=self.lr)
        
        # loss function
        self.value_loss_fn = nn.MSELoss()
        if self.duel_q_net:
            self.q1_loss_fn = nn.MSELoss()
            self.q2_loss_fn = nn.MSELoss()
        else:
            self.q_loss_fn = nn.MSELoss()

        # replay memory
        self.replay_memory = util.ReplayMemory(1000000)
        self.transition = collections.namedtuple("transition", ["state", 
                "action", "next_state", "reward"])
        
        # entropy target
        self.entropy_target = - self.action_size
        self.noise = OUNoise(mu=np.zeros(self.action_size), sigma=0.1)
        self.epsilon = 1.
Exemple #18
0
# Main function
if __name__ == '__main__':
    # set unity environment path (file_name)
    env = UnityEnvironment(file_name=config.env_name)
    # env = UnityEnvironment(file_name=config.env_name, worker_id=np.random.randint(100000))

    # setting brain for unity
    default_brain = env.brain_names[0]
    brain = env.brains[default_brain]

    train_mode = config.train_mode

    device = config.device

    actor = model.Actor(config.action_size, "main").to(device)
    target_actor = model.Actor(config.action_size, "target").to(device)
    critic = model.Critic(config.action_size, "main").to(device)
    target_critic = model.Critic(config.action_size, "target").to(device)

    optimizer_actor = optim.Adam(actor.parameters(), lr=config.actor_lr)
    optimizer_critic = optim.Adam(critic.parameters(), lr=config.critic_lr)

    algorithm = "_DDPG"
    agent = agent.DDPGAgent(actor, critic, target_actor, target_critic,
                            optimizer_actor, optimizer_critic, device,
                            algorithm)

    # Initialize target networks
    agent.hard_update_target()