Example #1
0
File: RL.py Project: Tarkof/DRL
class RL_Agent():
    def __init__(self, episode_size=150):
        self.model = MLP((SCREEN_HEIGHT_g, SCREEN_WIDTH_g), 300)
        #self.load("models/model_1185.npz")
        self.activations = []
        self.frames = []
        self.states_alive = []

        self.episode_size = episode_size
        self.episode_decisions = np.zeros((8))

        self.episodes_wins = 0
        self.episodes_nb = 0
        self.iter = 0

    def explore_exploit(self, prediction):
        choice = np.random.rand()
        if choice <= .8:
            return prediction
        return one_hot(round(np.random.rand() * 7))

    def update(self, frame, is_dead):
        frame = preprocessing(frame)
        act_h, y_pred = self.model.forward_keep_activations(frame)
        y_pred = self.explore_exploit(y_pred)
        self.activations.append((act_h, y_pred))
        self.frames.append(frame)

        #print(y_pred)

        self.episode_decisions += one_hot(y_pred)

        self.iter += 1
        if is_dead or self.iter == self.episode_size:
            for activation, frame in zip(self.activations, self.frames):
                if is_dead:  # Agent is dead
                    #y_true = np.array([0, 0, 0, 0.5, 0.5, 0, 0, 0])
                    y_true = wrong_move(one_hot(activation[1]))
                    #print("\n", activation[1],'\n', y_true, '\n\n')
                    grads = self.model.gradients(frame, activation[0],
                                                 activation[1], y_true)
                    self.model.backward(grads)
                else:
                    y_true = one_hot(activation[1])
                    grads = self.model.gradients(frame, activation[0],
                                                 activation[1], y_true)
                    self.model.backward(grads)

            print(
                f"Episode #{self.episodes_nb} : {'LOSE' if is_dead else 'WIN'}   Decisions : {self.episode_decisions}"
            )

            self.reset()
            self.episodes_nb += 1
            self.episode_decisions.fill(0)

            if not is_dead:
                self.episodes_wins += 1

            if self.episodes_nb % 100 == 0:
                print(f"Last 100 episodes : {self.episodes_wins}%")
                self.episodes_wins = 0

        return np.argmax(y_pred)

    def reset(self):
        self.activations = []
        self.iter = 0

    def load(self, path):
        loaded = np.load(path)
        self.model.Wh = loaded['Wh']
        self.model.Bh = loaded['Bh']
        self.model.Wo = loaded['Wo']
        self.model.Bo = loaded['Bo']

    def save(self):
        np.savez_compressed("models/model_" + str(self.episodes_nb),
                            Wh=self.model.Wh,
                            Bh=self.model.Bh,
                            Wo=self.model.Wo,
                            Bo=self.model.Bo)