Ejemplo n.º 1
0
class DeepQLearner (BaseLearner):
    def __init__(self):
        super(DeepQLearner, self).__init__()
        self.discount_rate = 0.95
        self.network = QNetwork()
        self.database = ExperienceDatabase(100000)
        self.weights = []

    def decide_action(self, new_state, possible_moves):
        # compute Q-scores with forward-propagation
        scores = np.zeros(len(possible_moves))
        for i, move in enumerate(possible_moves):
            scores[i] = self.network.use_model(new_state, move)

        # train neural-network
        if self.last_state is not None:
            self.database.add(self.last_state, self.last_action, self.last_reward, new_state, possible_moves)

            past_last_state, past_last_action, past_last_reward, past_new_state, past_possible_moves = self.database.sample(1)[0]

            best_score = None
            for move in past_possible_moves:
                best_score = max(best_score, self.network.use_model(past_new_state, move))

            # update weights with back-propagation
            self.network.update_model(past_last_state, past_last_action, float(past_last_reward + self.discount_rate * best_score))

        return possible_moves[self.explorer.decide_action(self.epoch, scores)]

    def end_epoch(self, score):
        super(DeepQLearner, self).end_epoch(score)

        #save the network weights at this epoch
        if self.epoch % 1000 == 0:
            self.weights.append(self.network.get_all_weights())
Ejemplo n.º 2
0
 def __init__(self):
     super(DeepQLearner, self).__init__()
     self.discount_rate = 0.95
     self.network = QNetwork()
     self.database = ExperienceDatabase(100000)
     self.weights = []