def get_move(self, state, moves, side): self.epochs += 1 if self.no_learn: self.mepsilon = 1. else: self.mepsilon = min(0.99, 0.5 + self.epochs / 1e5) if self.foil and self.epochs % 1000 == 0: print "Reloading network" self.load_network() if random.random() > self.mepsilon: action = random.choice(list(valid_columns(state))) else: action = self.ms2m(moves) state1 = moves_to_state(moves + [action]) if not self.no_learn: self.learner(self.state0, state1=state1) self.state0 = state1 return action
def moves_to_move(moves0): state0 = moves_to_state(moves0) Qs = Q_fn(np.array([state0]))[0] return np.nanargmax(Qs)