Ejemplo n.º 1
0
    def get_move(self, state, moves, side):
        self.epochs += 1
        if self.no_learn:
            self.mepsilon = 1.
        else:
            self.mepsilon = min(0.99, 0.5 + self.epochs / 1e5)
        if self.foil and self.epochs % 1000 == 0:
            print "Reloading network"
            self.load_network()
        if random.random() > self.mepsilon:
            action = random.choice(list(valid_columns(state)))
        else:
            action = self.ms2m(moves)

        state1 = moves_to_state(moves + [action])
        if not self.no_learn:
            self.learner(self.state0, state1=state1)
        self.state0 = state1

        return action
Ejemplo n.º 2
0
 def moves_to_move(moves0):
     state0 = moves_to_state(moves0)
     Qs = Q_fn(np.array([state0]))[0]
     return np.nanargmax(Qs)