Beispiel #1
0
    def __init__(self, state_size, action_size, seed, network="Dueling", stepkey="Double"):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        print ("Architecture: " + str(network) + " " + str(stepkey) + " QN")
        self.stepkey = stepkey
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        if (network=="Dueling"):
            self.qnetwork_local = DuelingQNetwork(state_size, action_size, seed).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size, seed).to(device)
        elif (network=="Convolutional"):
            self.qnetwork_local = ConvolutionalDuelingQNetwork(state_size, action_size, seed).to(device)
            self.qnetwork_target = ConvolutionalDuelingQNetwork(state_size, action_size, seed).to(device)             
        else:
            self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) 
        print (self.qnetwork_local)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #2
0
    def __init__(self, state_size, action_size, seed, use_double_dqn,
                 use_dueling_dqn):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.use_double_dqn = use_double_dqn

        if use_dueling_dqn:
            # Dueling Q-Network
            self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                                  seed).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                                   seed).to(device)
        else:
            # Q-Network
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)

        # Initialize time step (for updating every UPDATE_NETWORK_EVERY steps)
        self.t_step = 0
Beispiel #3
0
    def __init__(self, state_size, action_size, num_episodes, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            num_episodes (int): number of training epochs
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = seed

        # Q-Network
        self.qnetwork_local = DuelingQNetwork(state_size, action_size, seed).to(device)
        self.qnetwork_target = DuelingQNetwork(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.anneal_beta = (1. - BETA) / num_episodes

        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed, ALPHA, BETA)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
        self.t_learning_step = 0
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                              seed).to(device)
        self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                               seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
        # Use the optim package to define an Optimizer that will update the weights of
        # the model for us. Here we will use Adam; the optim package contains many other
        # optimization algorithms.

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #5
0
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            model (string): which network to use
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                              seed).to(device)
        self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                               seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
    def __init__(self, env_name, gamma, nstep, target_update_period, n_frames):

        self.env_name = env_name

        self.gamma = gamma

        self.nstep = nstep

        self.action_space = gym.make(env_name).action_space.n

        self.qnet = DuelingQNetwork(action_space=self.action_space)

        self.target_qnet = DuelingQNetwork(action_space=self.action_space)

        self.target_update_period = target_update_period

        self.n_frames = n_frames

        #self.optimizer = tf.keras.optimizers.Adam(lr=0.0001)

        self.optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.00025 / 4,
                                                     rho=0.95,
                                                     momentum=0.0,
                                                     epsilon=1.5e-07,
                                                     centered=True)

        self.update_count = 0
Beispiel #7
0
    def __init__(self, state_size, action_size, seed, use_is=True):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            use_is: flag indicating whether to use importance sampling when computing the sampling probabilities
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                              seed).to(device)
        self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                               seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = PrioritizedExperienceReplayBuffer(
            action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
        self.use_is = use_is
Beispiel #8
0
    def __init__(self, state_size, action_size, params, seed=None, model='dqn'):
        self.seed = seed
        if seed:
            random.seed(seed)
            np.random.seed(0)

        self.params = params
        self.state_size = state_size
        self.action_size = action_size
        self.eps = self.params['EPS']
        
        # Memmory to learn from.
        self.memory = ReplayBuffer(memory_size=self.params['BUFFER_SIZE'], sample_size=self.params['BATCH_SIZE'])

        # Network
        if model == 'dqn':
            # Vanilla DQN
            self.target = QNetwork(state_size=state_size, action_size=action_size, seed=seed).to(device)
            self.local = QNetwork(state_size=state_size, action_size=action_size, seed=seed).to(device)

        elif model == 'ddqn':
            # Dueling DQN
            self.target = DuelingQNetwork(state_size=state_size, action_size=action_size, seed=seed).to(device)
            self.local = DuelingQNetwork(state_size=state_size, action_size=action_size, seed=seed).to(device)

        self.optimizer = torch.optim.Adam(self.local.parameters(), lr=self.params['LR'])
       
        self.t_step = 0
Beispiel #9
0
    def __init__(self, state_size, action_size, seed):  #, writer):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                              seed).to(device)
        self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                               seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # TODO: Swap ReplayBuffer for PER buffer
        # Replay memory
        #         self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        self.memory = PrioritisedReplayBuffer(action_size, BUFFER_SIZE,
                                              BATCH_SIZE, ALPHA, EPSILON)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
        self.beta = BETA_START
Beispiel #10
0
    def __init__(self, state_size, action_size, double_dqn, dueling, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            double_qn (bool): true if double dqn else false
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.double_dqn = double_dqn
        self.seed = random.seed(seed)
        self.dueling = dueling

        # Q-Network
        if dueling:
            self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                                  seed).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                                   seed).to(device)

        else:
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #11
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 ddqn=False,
                 dueling=False,
                 init_td=1e-5,
                 prioritize_weight=0.0,
                 beta_scheduler=None):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network

        if not dueling:
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)

        elif dueling:
            self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                                  seed).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                                   seed).to(device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        if (prioritize_weight != 0.0):
            self.memory = PrioritizedReplayBuffer(action_size, BUFFER_SIZE,
                                                  BATCH_SIZE, seed,
                                                  prioritize_weight,
                                                  beta_scheduler)

            self.init_td = init_td
            self.prioritize_weight = prioritize_weight

        else:
            self.prioritize_weight = 0.0

            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                       seed)

        self.init_td = init_td

        # Initialize time step (for updating every  steps)
        self.t_step = 0

        self.ddqn = ddqn
    def __init__(self, state_size, action_size, seed, prioritized=False):
        """Dueling Q network agent."""
        super().__init__(state_size, action_size, seed)

        # Dueling Q-Network
        self.qnetwork_local = DuelingQNetwork(state_size, action_size, seed).to(device)  # use GPU or not
        self.qnetwork_target = DuelingQNetwork(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
Beispiel #13
0
    def __init__(self, seed, **kwargs):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.seed = random.seed(seed)

        # Hyper-parameters
        self.network_args = kwargs.get('network_args', {})
        self.buffer_size = kwargs.get('buffer_size', BUFFER_SIZE)
        self.batch_size = kwargs.get('batch_size', BATCH_SIZE)
        self.gamma = kwargs.get('gamma', GAMMA)
        self.tau = kwargs.get('tau', TAU)
        self.update_every = kwargs.get('update_every', UPDATE_EVERY)
        self.lr = kwargs.get('lr', LR)
        self.double_q = kwargs.get('double_q', False)
        self.dueling = kwargs.get('dueling', False)
        self.ray_layer = kwargs.get('ray_layer', False)

        # Q-Network
        if self.dueling:
            if self.ray_layer:
                self.qnetwork_local = DuelingQNetworkWithRayLayer(
                    seed, **self.network_args).to(device)
                self.qnetwork_target = DuelingQNetworkWithRayLayer(
                    seed, **self.network_args).to(device)
            else:
                self.qnetwork_local = DuelingQNetwork(
                    seed, **self.network_args).to(device)
                self.qnetwork_target = DuelingQNetwork(
                    seed, **self.network_args).to(device)
        else:
            if self.ray_layer:
                self.qnetwork_local = QNetworkWithRayLayer(
                    seed, **self.network_args).to(device)
                self.qnetwork_target = QNetworkWithRayLayer(
                    seed, **self.network_args).to(device)
            else:
                self.qnetwork_local = QNetwork(seed,
                                               **self.network_args).to(device)
                self.qnetwork_target = QNetwork(seed,
                                                **self.network_args).to(device)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(),
                                    lr=self.lr)

        # Replay memory
        self.memory = ReplayBuffer(ACTION_SIZE, self.buffer_size,
                                   self.batch_size, seed)
        # Initialize time step (for updating every update_every steps)
        self.t_step = 0
Beispiel #14
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 double_dqn=True,
                 priority_replay=True,
                 dueling_network=True):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.B = B_START

        self.double_dqn = double_dqn
        self.priority_replay = priority_replay
        self.dueling_network = dueling_network

        # Q-Network
        if self.dueling_network:
            self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                                  seed).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                                   seed).to(device)
        else:
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        if self.priority_replay:
            self.memory = PrioritizedReplayBuffer(state_size,
                                                  BUFFER_SIZE,
                                                  BATCH_SIZE,
                                                  seed,
                                                  use_rank=False)
        else:
            self.memory = ReplayBuffer(state_size, BUFFER_SIZE, BATCH_SIZE,
                                       seed)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 DDQN=False,
                 PRB=False,
                 Dueling=False):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            DDQN (bool): apply Double DDQN algorithm
            PRB (bool): use a Prioritized ReplayBuffer
            Dueling (bool): use a Dueling NN-architecture
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.DDQN = DDQN
        self.PRB = PRB

        # Q-Network
        if Dueling:
            self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                                  seed).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                                   seed).to(device)
        else:
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        if self.PRB:
            self.memory = PrioritizedReplayBuffer(action_size, BUFFER_SIZE,
                                                  BATCH_SIZE, seed, ALPHA,
                                                  BETA_START, BETA_INCREASE)
        else:
            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                       seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #16
0
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        super(DuelingAgent, self).__init__(state_size, action_size, seed)

        # Q-Network
        self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                              seed).to(device)
        self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                               seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
    def __init__(self, state_size, action_size, seed, args):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.double_dqn = args.double_dqn
        self.dueling_dqn = args.dueling_dqn
        self.args = args
        assert self.double_dqn * self.dueling_dqn == 0
        if self.double_dqn:
            print("Implementing Double DQN!")
        elif self.dueling_dqn:
            print("Implementing Dueling DQN!")
        else:
            print("Implementing DQN")

        # Q-Network
        if self.dueling_dqn:
            self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                                  seed).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                                   seed).to(device)
        else:
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(),
                                    lr=self.args.lr)

        # Replay memory
        self.memory = ReplayBuffer(action_size, args.buffer_size,
                                   args.batch_size, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #18
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 hidden_sizes=[64, 64],
                 flavor='plain'):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            hidden_sizes (list): list of neurons in each layer
            flavor (str): flavor of the network - plain, double, dueling, double-dueling
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.hidden_sizes = hidden_sizes
        self.flavor = flavor

        # Q-Network
        if self.flavor == 'plain' or self.flavor == 'double':
            self.qnetwork_local = QNetwork(state_size, action_size, seed,
                                           hidden_sizes).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size, seed,
                                            hidden_sizes).to(device)
        # Dueling Q-Network
        if self.flavor == 'dueling' or self.flavor == 'double-dueling':
            self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                                  seed,
                                                  hidden_sizes).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                                   seed,
                                                   hidden_sizes).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #19
0
    def __init__(self, id, state_size, action_size, seed, use_double=False, use_prio=False, use_dueling=False):
        """Initialize an Agent object.
        
        Params
        ======
            id (int): id used to identify the agent
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            double (boolean): Use Double DQN algorithm
            use_prio (boolean): Use Prioritized Experience Replay
            use_dueling (boolean): Use Dueling DQN algorithm
        """
        self.state_size = state_size
        self.action_size = action_size
        self.id = id

        self.use_double = use_double
        self.use_prio = use_prio
        self.use_dueling = use_dueling
        self.seed = random.seed(seed)

        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        # Q-Network
        if use_dueling:
            self.qnetwork_local = DuelingQNetwork(state_size, action_size, seed).to(self.device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size, seed).to(self.device)
        else:
            self.qnetwork_local = QNetwork(state_size, action_size, seed).to(self.device)
            self.qnetwork_target = QNetwork(state_size, action_size, seed).to(self.device)
            
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
        
        # Replay memory
        if use_prio:
            self.memory = NaivePrioritizedReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed, PRIO_ALPHA, PRIO_EPSILON)
        else:
            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #20
0
    def __init__(self, state_size, action_size, mem_length=100000, ddqn=True):
        self.gamma = 0.99
        self.batch_size = 64
        self.action_size = action_size
        self.ddqn = ddqn

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        if ddqn:
            self.model = DuelingQNetwork(state_size,
                                         action_size).to(self.device)
            self.target_model = DuelingQNetwork(state_size,
                                                action_size).to(self.device)
            self.optimizer = optim.Adam(self.model.parameters(), lr=5e-4)
            self.experience = self.ddqn_experience
        else:
            self.model = QNetwork(state_size, action_size).to(self.device)
            self.optimizer = optim.Adam(self.model.parameters(), lr=5e-4)
            self.experience = self.dqn_experience

        # replay memory
        self.memory = deque(maxlen=mem_length)
Beispiel #21
0
 def _create_nn(self, nn_type, state_size, action_size, seed, device):
     if nn_type == 'noisydueling':
         self._sample_noise = True
         return NoisyDuelingQNetwork(state_size,
                                     action_size,
                                     seed,
                                     device=device).to(device)
     elif nn_type == 'dueling':
         return DuelingQNetwork(state_size, action_size, seed).to(device)
     elif nn_type == 'q':
         return QNetwork(state_size, action_size, seed).to(device)
     else:
         raise Exception(
             'Unknown NN type - must be one of NoisyDueling, Dueling or Q')
    def __init__(self,
                 state_size,
                 action_size,
                 parameters,
                 evaluation_mode=False):
        self.evaluation_mode = evaluation_mode

        self.state_size = state_size
        self.action_size = action_size
        self.double_dqn = True
        self.hidsize = 1

        if not evaluation_mode:
            self.hidsize = parameters.hidden_size
            self.buffer_size = parameters.buffer_size
            self.batch_size = parameters.batch_size
            self.update_every = parameters.update_every
            self.learning_rate = parameters.learning_rate
            self.tau = parameters.tau
            self.gamma = parameters.gamma
            self.buffer_min_size = parameters.buffer_min_size

        # Device
        if parameters.use_gpu and torch.cuda.is_available():
            self.device = torch.device("cuda:0")
            print(" Using GPU")
            print(" GPU")

        else:
            self.device = torch.device("cpu")
            print(" Using CPU")

        # Q-Network
        self.qnetwork_local = DuelingQNetwork(state_size,
                                              action_size,
                                              hidsize1=self.hidsize,
                                              hidsize2=self.hidsize).to(
                                                  self.device)

        if not evaluation_mode:
            self.qnetwork_target = copy.deepcopy(self.qnetwork_local)
            self.optimizer = optim.Adam(self.qnetwork_local.parameters(),
                                        lr=self.learning_rate)
            self.memory = ReplayBuffer(action_size, self.buffer_size,
                                       self.batch_size, self.device)

            self.t_step = 0
            self.loss = 0.0
    def __init__(self, state_size, action_size, seed):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.model = DuelingQNetwork(state_size, action_size, seed).to(device)
#         self.qnetwork_target = DuelingQNetwork(state_size, action_size, seed).to(device)
#         for target_param, param in zip(self.qnetwork_local.parameters(),self.qnetwork_target.parameters()):
#             target_param.data.copy_(param)
            
        self.optimizer = optim.Adam(self.model.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
    def __init__(self, env_name, epsilon=0.05, n_frames=4):

        self.env_name = env_name

        self.env = gym.make(env_name)

        self.action_space = self.env.action_space.n

        self.epsilon = epsilon

        self.n_frames = n_frames

        self.frames = collections.deque(maxlen=n_frames)

        self.qnet = DuelingQNetwork(action_space=self.action_space)

        self.define_network()
    def __init__(self, pid, env_name, epsilon, alpha, buffer_size, n_frames,
                 gamma, nstep, reward_clip):

        self.pid = pid

        self.env = gym.make(env_name)

        self.epsilon = epsilon

        self.gamma = gamma

        self.alpha = alpha

        self.n_frames = n_frames

        self.action_space = self.env.action_space.n

        self.frames = collections.deque(maxlen=n_frames)

        self.nstep = nstep

        self.buffer_size = buffer_size

        self.local_buffer = LocalReplayBuffer(reward_clip=reward_clip,
                                              gamma=gamma,
                                              nstep=nstep)

        self.local_qnet = DuelingQNetwork(action_space=self.action_space)

        self.episode_steps = 0

        self.episode_rewards = 0

        self.lives = 5  #: Breakout only

        self.define_network()
Beispiel #26
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 double_DQN=False,
                 prioritized_replay=False,
                 dueling_networks=False):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            double_DQN (bool) : use double DQN
            prioritized_replay (bool): used prioritized_replay
            

        """

        self.state_size = state_size
        self.action_size = action_size
        self.seed = seed
        self.tau = TAU
        self.double_DQN = double_DQN
        self.prioritized_replay = prioritized_replay
        self.dueling_networks = dueling_networks

        if self.dueling_networks:
            # Q-Networks - Local, Target Neural Nets
            self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                                  seed).to(device)
            self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                                   seed).to(device)
            self.qnetwork_target.eval()

        else:
            # Q-Networks - Local, Target Neural Nets
            self.qnetwork_local = QNetwork(state_size, action_size,
                                           seed).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size,
                                            seed).to(device)
            self.qnetwork_target.eval()

        # Use optimizer to update the "local" neural net
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        if self.prioritized_replay:
            prioritized_params = {
                'a': 0.6,
                'b': 0.4,
                'b_inc_rate': 1.001,
                'e': 0.01
            }
            self.memory = PrioritizedReplayBuffer(action_size, BUFFER_SIZE,
                                                  BATCH_SIZE, seed, device,
                                                  prioritized_params)
        else:
            # Replay memory
            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                       seed, device)

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Beispiel #27
0
    def __init__(self,
                 state_size,
                 action_size,
                 buffer_size=int(1e5),
                 batch_size=64,
                 gamma=.99,
                 tau=1e-3,
                 lr=5e-4,
                 update_every=4,
                 use_double=False,
                 use_dueling=False,
                 use_priority=False,
                 use_noise=False,
                 seed=42):
        """Deep Q-Network Agent
        
        Args:
            state_size (int)
            action_size (int)
            buffer_size (int): Experience Replay buffer size
            batch_size (int)
            gamma (float): 
                discount factor, used to balance immediate and future reward
            tau (float): interpolation parameter for soft update target network
            lr (float): neural Network learning rate, 
            update_every (int): how ofter we're gonna learn, 
            use_double (bool): whether or not to use double networks improvement
            use_dueling (bool): whether or not to use dueling network improvement
            use_priority (bool): whether or not to use priority experience replay
            use_noise (bool): whether or not to use noisy nets for exploration
            seed (int)
        """

        self.state_size = state_size
        self.action_size = action_size
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.lr = lr
        self.update_every = update_every
        self.use_double = use_double
        self.use_dueling = use_dueling
        self.use_priority = use_priority
        self.use_noise = use_noise

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)

        # Q-Network
        if use_dueling:
            self.qn_local = DuelingQNetwork(state_size,
                                            action_size,
                                            noisy=use_noise).to(device)
        else:
            self.qn_local = QNetwork(state_size, action_size,
                                     noisy=use_noise).to(device)

        if use_dueling:
            self.qn_target = DuelingQNetwork(state_size,
                                             action_size,
                                             noisy=use_noise).to(device)
        else:
            self.qn_target = QNetwork(state_size, action_size,
                                      noisy=use_noise).to(device)

        # Initialize target model parameters with local model parameters
        self.soft_update(1.0)

        # TODO: make the optimizer configurable
        self.optimizer = optim.Adam(self.qn_local.parameters(), lr=lr)

        if use_priority:
            self.memory = PrioritizedReplayBuffer(buffer_size, batch_size)
        else:
            self.memory = ReplayBuffer(buffer_size, batch_size)

        # Initialize time step (for updating every update_every steps)
        self.t_step = 0
    def __init__(self,
                 state_size,
                 action_size,
                 buffer_size=int(1e5),
                 batch_size=64,
                 gamma=0.99,
                 tau=1e-3,
                 learn_rate=5e-4,
                 update_every=4,
                 per_epsilon=1e-5,
                 per_alpha=0.6,
                 per_beta=0.9,
                 device=DEFAULT_DEVICE,
                 seed=0):
        """ Initialize an object.

        :param state_size:  (int) Dimension of each state
        :param action_size:  (int) Dimension of each action
        :param buffer_size:  (int) Replay buffer size
        :param batch_size:  (int) Minibatch size used during learning
        :param gamma:  (float) Discount factor
        :param tau:  (float) Scaling parameter for soft update
        :param learn_rate:  (float) Learning rate used by optimizer
        :param update_every:  (int) Steps between updates of target network
        :param per_epsilon:  (float) PER hyperparameter, constant added to each error
        :param per_alpha:  (float) PER hyperparameter, exponent applied to each probability
        :param per_beta:  (float) PER hyperparameter, bias correction exponent for probability weight
        :param device:  (torch.device)  Object representing the device where to allocate tensors
        :param seed:  (int) Seed used for PRNG
        """
        # Save copy of model parameters
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.device = device

        # Save copy of hyperparameters
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.learn_rate = learn_rate
        self.update_every = update_every
        self.per_epsilon = per_epsilon
        self.per_alpha = per_alpha
        self.per_beta = per_beta

        # Q networks
        self.qnetwork_local = DuelingQNetwork(state_size, action_size,
                                              seed).to(device)
        self.qnetwork_target = DuelingQNetwork(state_size, action_size,
                                               seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(),
                                    lr=learn_rate)

        # Replay memory
        self.memory = PrioritizedReplayBuffer(memory_size=buffer_size,
                                              device=device,
                                              update_every=update_every,
                                              seed=seed)

        # Initialize time step (for updating every self.update_every steps)
        self.t_step = 0
        self.episode = 0
Beispiel #29
0
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 lr_decay=9999e-4,
                 double_dqn=False,
                 dueling_network=False,
                 prioritized_replay=False):
        """ Initialize an Agent instance.
        
        Params
        ======
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            seed (int): Random seed
            lr_decay (float): Multiplicative factor of learning rate decay
            double_dqn (bool): Toogle for using the Double-DQN method
            dueling_network (bool): Toogle for using the Dueling Network (DN) method
            prioritized_replay (bool): Toogle for using the Prioritized Replay method
        """

        # Set the parameters.
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.double_dqn = double_dqn
        self.dueling_network = dueling_network
        self.prioritized_replay = prioritized_replay

        # Q-Network hidden layers.
        hidden_layers = [128, 32]

        # Use the Dueling Network (DN) method.
        if self.dueling_network:

            # DN requires a hidden state value.
            hidden_state_value = [64, 32]

            self.qnetwork_local = DuelingQNetwork(
                state_size, action_size, seed, hidden_layers,
                hidden_state_value).to(device)
            self.qnetwork_target = DuelingQNetwork(
                state_size, action_size, seed, hidden_layers,
                hidden_state_value).to(device)
            self.qnetwork_target.eval()

        else:  # Use the Deep Q-Network (DQN) method.

            self.qnetwork_local = QNetwork(state_size, action_size, seed,
                                           hidden_layers).to(device)
            self.qnetwork_target = QNetwork(state_size, action_size, seed,
                                            hidden_layers).to(device)
            self.qnetwork_target.eval()

        # Optimize using Adam.
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(),
                                    lr=LEARNING_RATE)
        self.lr_scheduler = optim.lr_scheduler.ExponentialLR(
            self.optimizer, lr_decay)

        # Use the Prioritized Replay memory buffer if enabled.
        if self.prioritized_replay:

            self.memory = PrioritizedReplayBuffer(action_size,
                                                  BUFFER_SIZE,
                                                  BATCH_SIZE,
                                                  seed,
                                                  device,
                                                  alpha=0.6,
                                                  beta=0.4,
                                                  beta_scheduler=1.0)

        else:  # Use the Replay memory buffer instead.
            self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                       seed, device)

        # Initialize the time step (until the THRESHOLD is reached).
        self.t_step = 0