Ejemplo n.º 1
0
    def __init__(self, input_space, action_space, args):

        self.use_expert = args.use_expert
        self.gamma = args.gamma
        self.tau = args.tau
        self.alpha = args.alpha
        self.action_range = [action_space.low, action_space.high]
        self.policy_type = args.policy

        self.target_update_interval = args.target_update_interval
        self.automatic_entropy_tuning = args.automatic_entropy_tuning

        # self.device = torch.device("cuda" if args.cuda else "cpu")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # print(torch.cuda.is_available())
        # print(torch.cuda.current_device())
        # print(torch.cuda.device(0))
        # print(torch.cuda.device_count())
        # print(torch.cuda.get_device_name())
        # print(torch.backends.cudnn.version())
        # print(torch.backends.cudnn.is_available())

        self.critic = QNetwork(input_space, action_space.shape[0], args.hidden_size).to(device=self.device)
        self.critic_optim = Adam(self.critic.parameters(), lr=args.lr)

        self.critic_target = QNetwork(input_space, action_space.shape[0], args.hidden_size).to(self.device)
        hard_update(self.critic_target, self.critic)

        if self.policy_type == "Gaussian":
            # Target Entropy = −dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper
            if self.automatic_entropy_tuning is True:
                self.target_entropy = -torch.prod(torch.Tensor(action_space.shape).to(self.device)).item()
                self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
                self.alpha_optim = Adam([self.log_alpha], lr=args.lr)

            self.policy = GaussianPolicy(input_space, action_space.shape[0], args.hidden_size, action_space).to(self.device)
            self.policy_optim = Adam(self.policy.parameters(), lr=args.lr)

        else:
            raise ValueError("Not supper another type yet.")
Ejemplo n.º 2
0
    def __init__(self,
                 state_size,
                 action_size,
                 buffer_size=int(1e5),
                 batch_size=64,
                 learning_rate=5e-4,
                 gamma=0.99,
                 tau=0.001):
        self.state_size = state_size
        self.action_size = action_size
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.tau = tau

        self.q_network = QNetwork(self.state_size, self.action_size)
        self.q_network_target = QNetwork(self.state_size, self.action_size)
        self.optimizer = optim.Adam(self.q_network.parameters(),
                                    lr=self.learning_rate)

        self.replay_buffer = ReplayBuffer(self.buffer_size)
Ejemplo n.º 3
0
    def __init__(self, state_size: int, action_size: int, seed: int):
        """
        Initialize Agent.

        :param state_size:
        :param action_size:
        :param seed: random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = seed

        # QNetwork
        self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device)
        self.optimzer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Reply memory
        self.memory = ReplyBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)

        # initialize time step for updating
        self.t_step = 0
Ejemplo n.º 4
0
    def __init__(self, state_size, action_size, seed, lr_decay=0.9999):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            lr_decay (float): multiplicative factor of learning rate decay
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        print("Running on: "+str(device))
        
        # Q-Network
        hidden_layers = [128, 32]
        
        if USE_DUELING_NETWORK:
            hidden_state_value = [64, 32]
            
            self.qnetwork_local = DuelingQNetwork(state_size, action_size, seed, hidden_layers, hidden_state_value).to(device)

            self.qnetwork_target = DuelingQNetwork(state_size, action_size, seed, hidden_layers, hidden_state_value).to(device)
            self.qnetwork_target.eval()
            
        else:
            self.qnetwork_local = QNetwork(state_size, action_size, seed, hidden_layers).to(device)

            self.qnetwork_target = QNetwork(state_size, action_size, seed, hidden_layers).to(device)
            self.qnetwork_target.eval()
            
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
        self.lr_scheduler = optim.lr_scheduler.ExponentialLR(self.optimizer, lr_decay)
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed, device)
        
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Ejemplo n.º 5
0
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
    def __init__(self, num_inputs, action_space, \
                 device, hidden_size, lr, gamma, tau, alpha):

        self.gamma = gamma
        self.tau = tau
        self.alpha = alpha

        self.device = device 

        self.critic = QNetwork(num_inputs, action_space.shape[0], hidden_size).to(device=self.device)
        self.critic_optim = Adam(self.critic.parameters(), lr=lr)

        self.critic_target = QNetwork(num_inputs, action_space.shape[0], hidden_size).to(self.device)
        hard_update(self.critic_target, self.critic)
        
        # Target Entropy = −dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper
        self.target_entropy = -torch.prod(torch.Tensor(action_space.shape).to(self.device)).item()
        self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
        self.alpha_optim = Adam([self.log_alpha], lr=lr)
        self.policy = GaussianPolicy(num_inputs, action_space.shape[0], \
                                         hidden_size, action_space).to(self.device)
        self.policy_optim = Adam(self.policy.parameters(), lr=lr)
Ejemplo n.º 7
0
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        # Q- Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        self.noise = OUNoise(action_size, seed)
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
Ejemplo n.º 8
0
    def __init__(self, state_size, action_size, buffer_size, batch_size, gamma,
                 tau, learning_rate, update_every, device, seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            buffer_size (int): replay buffer size
            batch_size (int): minibatch size
            gamma (float): discount factor
            tau (float): used for soft update of target parameters
            learning_rate (float): learning rate
            update_every (int): how many steps between network updates
            device (torch.Device): pytorch device
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.update_every = update_every
        self.device = device
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(),
                                    lr=learning_rate)

        # Replay memory
        self.memory = ReplayBuffer(action_size, buffer_size, batch_size,
                                   device, seed)
        # Initialize time step (for updating every self.update_every steps)
        self.t_step = 0
Ejemplo n.º 9
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 dqn_type="double",
                 dueling=True):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            dqn_type: can be simple, double, dual
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.dqn_type = dqn_type

        # Q-Network
        if dueling:
            self.qnetwork_local = DuelQNetwork(state_size, action_size,
                                               seed).to(device)
            self.qnetwork_target = DuelQNetwork(state_size, action_size,
                                                seed).to(device)
        else:
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
        # self.scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(self.optimizer, 10, 2)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Ejemplo n.º 10
0
    def __init__(self,
                 n_state,
                 n_actions,
                 n_hidden=32,
                 n_layers=2,
                 seed=333,
                 snapshotfile="snapshot.pth"):
        """ Initialize the agent.

        Args:
            n_state     (int):  Number of features that represent the state
            n_actions   (int):  Number of actions available to agent
            n_hidden    (int):  Number of units in hidden neural net layers
            n_layers    (int):  Number of layers for neural network
            seed        (int):  Set the random seed (for reproducibility)
            snapshotfile (str): Filepath to use for saving weights
        """
        self.n_state = n_state
        self.n_actions = n_actions
        self.seed = random.seed(seed)
        self.snapshotfile = snapshotfile

        # Deep Q-Network
        self.qnetwork_local = QNetwork(n_state, n_actions, seed,
                                       n_hidden=64).to(device)
        self.qnetwork_target = QNetwork(n_state, n_actions, seed,
                                        n_hidden=64).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
        self.loss_func = torch.nn.MSELoss(reduce=True)

        # Experience Replay Memory
        self.memory = ReplayBuffer(n_actions, EXPERIENCE_MEMORY_SIZE,
                                   BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

        # TODO: have the is_training attribute control eval and train
        #       mode in pytprch network
        self.is_training = True
Ejemplo n.º 11
0
    def __init__(self, num_inputs, action_space, args):

        self.gamma = args.gamma
        self.tau = args.tau
        self.alpha = args.alpha

        self.policy_type = args.policy
        self.target_update_interval = args.target_update_interval
        self.automatic_entropy_tuning = args.automatic_entropy_tuning

        self.device = torch.device("cuda" if args.cuda else "cpu")

        self.critic = QNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(device=self.device)
        self.critic_optim = Adam(self.critic.parameters(), lr=args.lr)
        self.critic_target = QNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(self.device)

        self.safe_critic = SafeQNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(self.device)
        self.safe_critic_optim = Adam(self.critic.parameters(), lr=args.lr)
        self.safe_critic_target = SafeQNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(self.device)

        hard_update(self.critic_target, self.critic)
        hard_update(self.safe_critic_target, self.safe_critic)

        if self.policy_type == "Gaussian":
            # Target Entropy = −dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper
            if self.automatic_entropy_tuning is True:
                self.target_entropy = -torch.prod(torch.Tensor(action_space.shape).to(self.device)).item()
                self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
                self.alpha_optim = Adam([self.log_alpha], lr=args.lr)

            self.policy = GaussianPolicy(num_inputs, action_space.shape[0], args.hidden_size, action_space).to(self.device)
            self.policy_optim = Adam(self.policy.parameters(), lr=args.lr)

        else:
            self.alpha = 0
            self.automatic_entropy_tuning = False
            self.policy = DeterministicPolicy(num_inputs, action_space.shape[0], args.hidden_size, action_space).to(self.device)
            self.policy_optim = Adam(self.policy.parameters(), lr=args.lr)
Ejemplo n.º 12
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 use_dueling=False,
                 use_double=False):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            use_dueling (bool): if 'True' use dueling agent
            use_double (bool): if 'True' use double DDQN agent
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.use_dueling = use_dueling
        self.use_double = use_double

        # Q-Network
        self.qnetwork_local = QNetwork(state_size,
                                       action_size,
                                       seed,
                                       use_dueling=use_dueling).to(device)
        self.qnetwork_target = QNetwork(state_size,
                                        action_size,
                                        seed,
                                        use_dueling=use_dueling).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Ejemplo n.º 13
0
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)
        self.q_optimizer = optim.RMSprop(self.qnetwork_local.parameters(),
                                         lr=LR,
                                         eps=1e-5)

        # Succesor Representation Network
        self.snetwork = SRNetwork().to(device)
        self.snetwork_target = SRNetwork().to(device)
        self.s_optimizer = optim.RMSprop(self.snetwork.parameters(),
                                         lr=LR,
                                         eps=1e-5)

        self.pnetwork = PredNetwork(state_size, 1).to(device)
        self.p_optimizer = optim.RMSprop(self.pnetwork.parameters(),
                                         lr=LR,
                                         eps=1e-5)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Ejemplo n.º 14
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed=0,
                 use_dueling=False,
                 use_double=False,
                 fc1=64,
                 fc2=64):
        print('device is {}'.format(device))
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size,
                                       action_size,
                                       seed,
                                       fc1_units=fc1,
                                       fc2_units=fc2).to(device)
        self.qnetwork_target = QNetwork(state_size,
                                        action_size,
                                        seed,
                                        fc1_units=fc1,
                                        fc2_units=fc2).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Ejemplo n.º 15
0
    def __init__(self, sess, state_size, action_size, seed, arguments):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.sess = sess
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        self.learning_rate = arguments['lr']
        self.gamma = arguments['gamma']
        self.update_every = arguments['update_every']
        self.tau = arguments['tau']
        self.history_size = arguments['history_size']

        self.buffer_size = arguments['buffer_size']
        self.batch_size = arguments['batch_size']

        # Q-Network
        self.qnetwork_local = QNetwork('local_q', state_size, action_size, self.history_size)
        self.qnetwork_target = QNetwork('target_q', state_size, action_size, self.history_size)

        copy_ops = []
        for local_w, target_w in zip(self.qnetwork_local.variables, self.qnetwork_target.variables):
            copy_op = tf.assign(local_w, local_w * self.tau + (1.0 - self.tau) * target_w)
            copy_ops.append(copy_op)

        self.copy_ops = tf.group(*copy_ops, name='copy_op')

        # Replay memory
        self.memory = ReplayBuffer(action_size, self.buffer_size, self.batch_size, seed)
        # Initialize time step (for updating every self.update_every steps)
        self.t_step = 0
Ejemplo n.º 16
0
    def __init__(self, state_size, action_size,
                 buffer_size=BUFFER_SIZE, batch_size=BATCH_SIZE,
                 gamma=GAMMA, tau=TAU, lr=LR, update_every=UPDATE_EVERY, seed=0):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            buffer_size (int): replay buffer size
            batch_size (int): batch size
            batch_size (int): batch size
            gamma (int): time discount
            tau (int): target network soft update rate
            lr (int): learning rate
            update_every (int): learn every this number of steps
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.lr = lr
        self.update_every = update_every
        self.seed = seed
        self.rng = np.random.RandomState(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, self.buffer_size, self.batch_size, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Ejemplo n.º 17
0
    def __init__(self, n_states, n_actions, hidden_dim, lr, device):
        """Agent class that choose action and train

        Args:
            n_states (int): input dimension
            n_actions (int): output dimension
            hidden_dim (int): hidden dimension
        """

        self.device = device

        self.q_local = QNetwork(n_states, n_actions, hidden_dim=16).to(self.device)
        self.q_target = QNetwork(n_states, n_actions, hidden_dim=16).to(self.device)
        
        self.mse_loss = torch.nn.MSELoss()
        self.optim = optim.Adam(self.q_local.parameters(), lr=lr)
        
        self.n_states = n_states
        self.n_actions = n_actions
        

        #  ReplayMemory: trajectory is saved here
        self.replay_memory = ReplayMemory(10000)
Ejemplo n.º 18
0
 def _create_nn(self, nn_type, state_size, action_size, seed, device):
     if nn_type == 'noisydueling':
         self._sample_noise = True
         return NoisyDuelingQNetwork(state_size,
                                     action_size,
                                     seed,
                                     device=device).to(device)
     elif nn_type == 'dueling':
         return DuelingQNetwork(state_size, action_size, seed).to(device)
     elif nn_type == 'q':
         return QNetwork(state_size, action_size, seed).to(device)
     else:
         raise Exception(
             'Unknown NN type - must be one of NoisyDueling, Dueling or Q')
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)  # Q(S,A;w)
        self.qnetwork_target = QNetwork(state_size, action_size, seed).to(
            device)  # fixated target Q(S',a;w-)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   seed)  # Replay memory

        self.t_step = 0  # count time step
Ejemplo n.º 20
0
    def __init__(self, state_size, action_size, double_dqn=True):
        self.state_size = state_size
        self.action_size = action_size
        self.double_dqn = double_dqn

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size).to(device)
        self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
        self.optimizer = torch.optim.Adam(self.qnetwork_local.parameters(),
                                          lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE)
        self.t_step = 0
Ejemplo n.º 21
0
    def __init__(self, state_size, action_size, update_type='dqn', seed=0):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.update_type = update_type

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size)
        self.qnetwork_target = QNetwork(state_size, action_size)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Ejemplo n.º 22
0
Archivo: dqn.py Proyecto: xuezzee/-
    def __init__(self, state_size, action_size, num_agents, double_dqn=False):
        self.action_size = action_size
        self.double_dqn = double_dqn

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size).to(device)
        self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
        self.optimizer = torch.optim.Adam(self.qnetwork_local.parameters(), lr=LR)
        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=4000, gamma=0.98, last_epoch=-1)

        # Replay memory
        self.memory = ReplayBuffer(BUFFER_SIZE)
        self.num_agents = num_agents
        self.t_step = 0
Ejemplo n.º 23
0
    def __init__(self, state_size, action_size, seed, enable_curiosity):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.enable_curiosity = enable_curiosity

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)

        # Curiosity Elements
        self.fwd_model = FwdModel(state_size, action_size, seed).to(device)
        self.inverse_model = InverseModel(state_size, action_size,
                                          seed).to(device)

        ##Optimizer
        params_to_opt = list(self.qnetwork_local.parameters()) + list(
            self.fwd_model.parameters()) + list(
                self.inverse_model.parameters())
        self.optimizer = optim.Adam(params_to_opt, lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

        self.loss_list = []
Ejemplo n.º 24
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed=0,
                 lr=1e-3,
                 update_every=4,
                 batch_size=4,
                 buffer_size=64,
                 gamma=0.0994,
                 tau=1e-3,
                 model_path="model.pth"):
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        print("=== AGENT ===")
        print(f"Created agent on device: {self.device}")

        self.model_path = model_path
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.update_every = update_every
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau

        # network variables
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(self.device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(self.device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=lr)
        self.load()

        # Control variables
        self.memory = ReplayBuffer(action_size, buffer_size, self.batch_size,
                                   seed, self.device)
        self.t_step = 0
    def __init__(self, state_size, action_size, seed, network_type=None):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        #Specify Network

        # Vanilla Q-Network
        if network_type == None:
            print("Using double network")
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)

        # Dueling Q-Network
        elif network_type == 'Dueling':
            print("Using double Dueling network")
            self.qnetwork_local = Dueling_QNetwork(state_size, action_size,
                                                   seed).to(device)
            self.qnetwork_target = Dueling_QNetwork(state_size, action_size,
                                                    seed).to(device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
    def __init__(self,
                 env_name="BreakoutDeterministic-v4",
                 gamma=0.99,
                 batch_size=32,
                 lr=0.00025,
                 update_period=4,
                 target_update_period=10000,
                 n_frames=4):

        self.env_name = env_name

        self.gamma = gamma

        self.batch_size = batch_size

        self.epsilon_scheduler = (
            lambda steps: max(1.0 - 0.9 * steps / 1000000, 0.1))

        self.update_period = update_period

        self.target_update_period = target_update_period

        env = gym.make(self.env_name)

        self.action_space = env.action_space.n

        self.qnet = QNetwork(self.action_space)

        self.target_qnet = QNetwork(self.action_space)

        self.optimizer = Adam(lr=lr, epsilon=0.01 / self.batch_size)

        self.n_frames = n_frames

        self.use_reward_clipping = True

        self.huber_loss = tf.keras.losses.Huber()
Ejemplo n.º 27
0
    def __init__(self, state_size, action_size, duel, fc1_units, fc2_units,
                 seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            fc1_units : number of nodes in the first hidden layer 
            fc2_units : number of nodes in the second hidden layer
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        #Chose betweeen regulat Q-Network or duel architecture
        #if(duel):
        #   self.qnetwork_local  = Duel_QNetwork(state_size, action_size,fc1_units,fc2_units, seed).to(device)
        #    self.qnetwork_target = Duel_QNetwork(state_size, action_size,fc1_units,fc2_units, seed).to(device)
        #else:
        #    self.qnetwork_local  = QNetwork(state_size, action_size,fc1_units,fc2_units, seed).to(device)
        #   self.qnetwork_target = QNetwork(state_size, action_size,fc1_units,fc2_units, seed).to(device)
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Visualize network
        print(self.qnetwork_local)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Ejemplo n.º 28
0
    def __init__(self, num_inputs, action_space, args):
        #self.n_flow = args.n_flows
        #assert self.n_flow == 0
        self.num_inputs = num_inputs
        #self.flow_family = args.flow_family
        self.num_layers = args.num_layers
        self.args = args

        self.gamma = args.gamma
        self.tau = args.tau
        self.alpha = args.alpha

        self.target_update_interval = args.target_update_interval
        self.automatic_entropy_tuning = args.automatic_entropy_tuning

        self.device = torch.device("cuda" if args.cuda else "cpu")

        self.critic = QNetwork(num_inputs, action_space.shape[0],
                               args.hidden_size).to(device=self.device)
        self.critic_optim = Adam(self.critic.parameters(), lr=args.lr)

        self.critic_target = QNetwork(num_inputs, action_space.shape[0],
                                      args.hidden_size).to(self.device)
        hard_update(self.critic_target, self.critic)

        if self.automatic_entropy_tuning:
            self.target_entropy = -torch.prod(
                torch.Tensor(action_space.shape).to(self.device)).item()
            self.log_alpha = torch.zeros(1,
                                         requires_grad=True,
                                         device=self.device)
            self.alpha_optim = Adam([self.log_alpha], lr=args.lr)

        self.policy = GaussianPolicy(num_inputs, action_space.shape[0],
                                     args.hidden_size, self.num_layers,
                                     args).to(self.device)
        self.policy_optim = Adam(self.policy.parameters(), lr=args.lr)
Ejemplo n.º 29
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 lr,
                 buffer_size,
                 batch_size,
                 update_step,
                 gamma,
                 tau,
                 dual_network=False):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        if dual_network == False:
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)
        else:
            self.qnetwork_local = Dual_QNetwork(state_size, action_size,
                                                seed).to(device)
            self.qnetwork_target = Dual_QNetwork(state_size, action_size,
                                                 seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=lr)

        # Replay memory
        self.memory = ReplayBuffer(action_size, buffer_size, batch_size, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

        self.update_step = update_step
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
    def __init__(self, action_size, frame_history=4, seed=42):
        """Initialize an Agent object.
        
        Params
        ======
            action_size (int): Dimension of each action
            frame_history (int): Number of continuous frames to be considered in each state 
            seed (int): Random seed
        """
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(action_size, frame_history,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(action_size, frame_history,
                                        seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                   frame_history, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0