Ejemplo n.º 1
0
 def test_set_weights(self):
     model = lambda: create_test_model(10, 10)
     model_1 = FFNetAsync(model)
     model_2 = FFNetAsync(model)
     weights_1 = model_1.get_weights()
     weights_2 = model_2.get_weights()
     for w_1, w_2 in zip(weights_1, weights_2):
         w_1 = (w_1 + w_2) / 2
     model_1.set_weights(weights_1)
Ejemplo n.º 2
0
class DDQNAgent(object):
    '''An agent class used in testing reinforcement learning algorithms.

    This class is made with the purpose that it would allow multiple agents to
    be trained concurrently in a single game so the majority of their
    work should be hidden behind this class.
    '''
    def __init__(self,
                 model,
                 memory_size=1024,
                 Batch_size=32,
                 Gamma=0.99,
                 Epsilon=rangefloat(1.0, 0.1, 1e6),
                 K=1,
                 name='Agent'):
        '''Create Agent from model description file.'''
        self.Memory = PrioritizedReplay(memory_size)
        self.Batch_size = Batch_size
        self.Gamma = Gamma
        if type(Epsilon) is float or type(Epsilon) is int:
            self.Epsilon = Epsilon
            self.Epsilon_gen = None
        else:
            self.Epsilon_gen = Epsilon
            self.Epsilon = next(self.Epsilon_gen)
        self.K = K
        self.current_state = None
        self.current_action = None
        self.model = FFNetAsync(model)
        self.target_model = FFNetAsync(model)
        self.terminal = False

    def initialize(self, current_state, current_action):
        self.current_state = current_state
        self.current_action = current_action
        self.terminal = False

    def chooseAction(self, time_step):
        '''Choose an action based on the current state.'''
        action = np.zeros(self.current_action.shape)
        if time_step % self.K == 0:
            if random.random() <= self.Epsilon:
                index = [random.randint(0, i - 1) for i in action.shape]
                action[index] = 1
            else:
                self.model.predict_on_batch(self.current_state)
                x = self.model.collect()
                index = np.argmax(x)
                action[index] = 1
            self.current_action = action.astype(np.uint8)
        return self.current_action

    def chooseOptimal(self):
        action = np.zeros(self.current_action.shape)
        self.target_model.predict_on_batch(self.current_state)
        x = self.target_model.collect()
        index = np.argmax(x)
        action[index] = 1
        return action

    def feedback(self, frame, reward, terminal):
        '''Receive feedback from Game.'''
        self.model.predict_on_batch(self.current_state)
        new_state = np.append(frame, self.current_state[..., 0:-1], axis=3)
        self.target_model.predict_on_batch(new_state)

        Q = np.max(self.model.collect().flatten() * self.current_action)
        out = self.target_model.collect().flatten()
        T = reward + self.Gamma * np.max(out)

        self.Memory.insert((self.current_state, self.current_action, reward,
                            new_state, terminal), abs(Q - T))
        self.current_state = new_state
        self.terminal = terminal

    def isTerminal(self):
        return self.terminal

    def save(self, name):
        #self.model.save(name)
        pass

    def train(self):
        '''Train the Agent.'''
        if self.Epsilon_gen is not None:
            self.Epsilon = next(self.Epsilon_gen)
        batch = self.Memory.batch()

        pseq_batch = np.concatenate([b[0] for b in batch], axis=0)
        action_batch = np.stack([b[1] for b in batch])
        reward_batch = np.array([b[2] for b in batch])
        seq_batch = np.concatenate([b[3] for b in batch], axis=0)
        term_batch = np.array([b[4] for b in batch])

        self.target_model.predict_on_batch(seq_batch)
        #self.model.predict_on_batch(seq_batch)
        self.model.predict_on_batch(pseq_batch)

        out = self.target_model.collect()
        #actions = self.model.collect()
        #out = out[np.arange(len(out)), np.argmax(actions, axis=1)].reshape(-1, 1)
        y_batch = self.model.collect()

        y_batch[action_batch == 1] = reward_batch + self.Gamma * np.max(
            out, axis=1) * np.invert(term_batch)
        self.model.train_on_batch(pseq_batch, y_batch)

        self.model.get_weights()
        self.target_model.get_weights()

        weights = self.model.collect()
        target_weights = self.target_model.collect()
        for i in range(len(target_weights)):
            target_weights[i] = target_weights[i] * (0.8) + weights[i] * 0.2
        self.target_model.set_weights(target_weights)

    def get_epsilon(self):
        return self.Epsilon
Ejemplo n.º 3
0
class DQNAgent(BaseDQNet):
    '''An agent class used in testing reinforcement learning algorithms.

    This class is made with the purpose that it would allow multiple agents to
    be trained concurrently in a single game so the majority of their
    work should be hidden behind this class.
    '''
    def __init__(self,
                 model,
                 replay_size=1024,
                 batch_size=32,
                 gamma=0.99,
                 epsilon=0.1,
                 tau=0.0001,
                 name='Agent',
                 replay_type='simple',
                 with_target=True,
                 previous_frames=4):
        '''Create Agent from model description file.'''
        super().__init__(epsilon, gamma, replayType, memory_size)
        self.tau = tau
        self.batch_size = batch_size
        self.with_target = with_target
        self.previous_frames = previous_frames

        self.model = FFNetAsync(model)
        # If using a target network, create a duplicate network from the same
        # network architecture file
        if with_target is True:
            self.target_model = FFNetAsync(model)

    def save(self, name):
        '''Save the networks using the keras save function.'''
        self.model.save('{}.h5'.format(name))
        if self.with_target is True:
            self.target_model.save('{}_target.h5'.format(name))

    def train(self):
        '''Train the Agent.'''
        prev_state, actions, rewards, next_state, terms = self.batch(
            self.batch_size)

        # If not using a target network, use the online network instead.
        if self.with_target is True:
            target_model = self.target_model
        else:
            target_model = self.model

        # Handles the predictions asynchronously
        target_model.predict_on_batch(next_state)
        self.model.predict_on_batch(prev_state)

        next_Qvalues = target_model.collect()
        current_Qvalues = self.model.collect()

        # Updates the Q value based on current state given the actions
        # Q_online(s_t, a) = r + gamma*Q_target(s_t+1, a)
        # or if terminal is True
        # Q_online(s_t, a) = r
        new_Qvalues = Q_update(current_Qvalues, actions, rewards, self.gamma,
                               next_Qvalues, terms)

        self.model.train_on_batch(prev_state, new_Qvalues)

        # If using a target network, update the weights of the target using the
        # online network's weights.
        if self.with_target is True:
            self.update_weights()

    def update_weights(self):
        '''Update the target network using the online network's weights.'''
        self.model.get_weights()
        self.target_model.get_weights()

        weights = self.model.collect()
        target_weights = self.target_model.collect()
        for i in range(len(target_weights)):
            target_weights[i] = target_weights[i] * (
                1 - self.tau) + weights[i] * self.tau
        self.target_model.set_weights(target_weights)

    def Qvalues(self, state, target=False):
        '''Return the Q values for the given state.
        
        If target is True then use the target network to predict the Q values.
        '''
        if target is True and self.with_target is True:
            model = self.target_model
        else:
            model = self.model
        model.predict_on_batch(state)
        x = model.collect()
        return x

    def update_state(self, state):
        '''Create a new state given input.
        
        First reshapes the state so that it may appended.
        
        The shape is (1, width, height, number of states).
        
        Currently casting the new state as float 16 to save space while maintaining
        compatibility with different feature formats ( catergorical, RGB ).
        '''
        reshaped_state = state.reshape((1, ) + state.shape + (1, ))
        return np.append(reshaped_state,
                         self.current_state[..., 0:-1],
                         axis=-1).astype(np.int8)

    def initialize(self, state, action):
        '''Initialize the agent with state and action.'''

        # Stacks the agents along a new axis.
        stacked_states = [state for _ in range(self.previous_frames)]
        new_state = np.stack(stacked_states, axis=-1)

        # Reshaped state shape is (1, width, height, number of prior states)
        single_state = new_state.reshape((1, ) + new_state.shape)
        self.current_state = single_state
        self.current_action = action
        self.terminal = False
        return True